xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fc9b51d31e2be1d1d6cc93760bb648055e237fa2)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {
526           if (roworiented) value = v[i*n+j];
527           else             value = v[i+j*m];
528         } else value = 0.0;
529         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
530         if (in[j] >= cstart && in[j] < cend) {
531           col   = in[j] - cstart;
532           nonew = a->nonew;
533           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
534         } else if (in[j] < 0) continue;
535 #if defined(PETSC_USE_DEBUG)
536         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
537 #endif
538         else {
539           if (mat->was_assembled) {
540             if (!aij->colmap) {
541               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
542             }
543 #if defined(PETSC_USE_CTABLE)
544             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
545             col--;
546 #else
547             col = aij->colmap[in[j]] - 1;
548 #endif
549             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
550               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
551               col  =  in[j];
552               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
553               B     = aij->B;
554               b     = (Mat_SeqAIJ*)B->data;
555               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
556               rp2   = bj + bi[row];
557               ap2   = ba + bi[row];
558               rmax2 = bimax[row];
559               nrow2 = bilen[row];
560               low2  = 0;
561               high2 = nrow2;
562               bm    = aij->B->rmap->n;
563               ba    = b->a;
564             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
565           } else col = in[j];
566           nonew = b->nonew;
567           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
568         }
569       }
570     } else {
571       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
572       if (!aij->donotstash) {
573         mat->assembled = PETSC_FALSE;
574         if (roworiented) {
575           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
576         } else {
577           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
578         }
579       }
580     }
581   }
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatGetValues_MPIAIJ"
587 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
588 {
589   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
590   PetscErrorCode ierr;
591   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
592   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
593 
594   PetscFunctionBegin;
595   for (i=0; i<m; i++) {
596     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
597     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
598     if (idxm[i] >= rstart && idxm[i] < rend) {
599       row = idxm[i] - rstart;
600       for (j=0; j<n; j++) {
601         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
602         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
603         if (idxn[j] >= cstart && idxn[j] < cend) {
604           col  = idxn[j] - cstart;
605           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
606         } else {
607           if (!aij->colmap) {
608             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
609           }
610 #if defined(PETSC_USE_CTABLE)
611           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
612           col--;
613 #else
614           col = aij->colmap[idxn[j]] - 1;
615 #endif
616           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
617           else {
618             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
619           }
620         }
621       }
622     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
628 
629 #undef __FUNCT__
630 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
631 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
632 {
633   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
634   PetscErrorCode ierr;
635   PetscInt       nstash,reallocs;
636   InsertMode     addv;
637 
638   PetscFunctionBegin;
639   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
640 
641   /* make sure all processors are either in INSERTMODE or ADDMODE */
642   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
643   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
644   mat->insertmode = addv; /* in case this processor had no cache */
645 
646   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
647   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
648   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
649   PetscFunctionReturn(0);
650 }
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
654 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
655 {
656   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
657   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
658   PetscErrorCode ierr;
659   PetscMPIInt    n;
660   PetscInt       i,j,rstart,ncols,flg;
661   PetscInt       *row,*col;
662   PetscBool      other_disassembled;
663   PetscScalar    *val;
664   InsertMode     addv = mat->insertmode;
665 
666   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
667 
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n; ) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) {
677           if (row[j] != rstart) break;
678         }
679         if (j < n) ncols = j-i;
680         else       ncols = n-i;
681         /* Now assemble all these values with a single function call */
682         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
683 
684         i = j;
685       }
686     }
687     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
688   }
689   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
690   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
691 
692   /* determine if any processor has disassembled, if so we must
693      also disassemble ourselfs, in order that we may reassemble. */
694   /*
695      if nonzero structure of submatrix B cannot change then we know that
696      no processor disassembled thus we can skip this stuff
697   */
698   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
699     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
700     if (mat->was_assembled && !other_disassembled) {
701       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
702     }
703   }
704   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
705     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
706   }
707   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
708   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
709   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
710 
711   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
712 
713   aij->rowvalues = 0;
714 
715   /* used by MatAXPY() */
716   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
717   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
718 
719   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
720   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
721 
722   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
723   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
724     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
725     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
726   }
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
748   PetscInt      *owners = A->rmap->range;
749   PetscInt       n      = A->rmap->n;
750   PetscMPIInt    size   = mat->size;
751   PetscSF        sf;
752   PetscInt      *lrows;
753   PetscSFNode   *rrows;
754   PetscInt       lastidx = -1, r, p = 0, len = 0;
755   PetscErrorCode ierr;
756 
757   PetscFunctionBegin;
758   /* Create SF where leaves are input rows and roots are owned rows */
759   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
760   for (r = 0; r < n; ++r) lrows[r] = -1;
761   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
762   for (r = 0; r < N; ++r) {
763     const PetscInt idx   = rows[r];
764     PetscBool      found = PETSC_FALSE;
765     /* Trick for efficient searching for sorted rows */
766     if (lastidx > idx) p = 0;
767     lastidx = idx;
768     for (; p < size; ++p) {
769       if (idx >= owners[p] && idx < owners[p+1]) {
770         rrows[r].rank  = p;
771         rrows[r].index = rows[r] - owners[p];
772         found = PETSC_TRUE;
773         break;
774       }
775     }
776     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
777   }
778   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
779   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
780   /* Collect flags for rows to be zeroed */
781   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
782   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
783   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
784   /* Compress and put in row numbers */
785   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
786   /* fix right hand side if needed */
787   if (x && b) {
788     const PetscScalar *xx;
789     PetscScalar       *bb;
790 
791     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
792     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
793     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
794     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
795     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
796   }
797   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
798   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
799   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
800     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
801   } else if (diag != 0.0) {
802     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
803     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
804     for (r = 0; r < len; ++r) {
805       const PetscInt row = lrows[r] + A->rmap->rstart;
806       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
807     }
808     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
809     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
810   } else {
811     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812   }
813   ierr = PetscFree(lrows);CHKERRQ(ierr);
814 
815   /* only change matrix nonzero state if pattern was allowed to be changed */
816   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
817     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
818     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
819   }
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
825 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
826 {
827   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
828   PetscErrorCode    ierr;
829   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
830   PetscInt          i,j,r,m,p = 0,len = 0;
831   PetscInt          *lrows,*owners = A->rmap->range;
832   PetscSFNode       *rrows;
833   PetscSF           sf;
834   const PetscScalar *xx;
835   PetscScalar       *bb,*mask;
836   Vec               xmask,lmask;
837   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
838   const PetscInt    *aj, *ii,*ridx;
839   PetscScalar       *aa;
840 #if defined(PETSC_DEBUG)
841   PetscBool found = PETSC_FALSE;
842 #endif
843 
844   PetscFunctionBegin;
845   /* Create SF where leaves are input rows and roots are owned rows */
846   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
847   for (r = 0; r < n; ++r) lrows[r] = -1;
848   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
849   for (r = 0; r < N; ++r) {
850     const PetscInt idx   = rows[r];
851     PetscBool      found = PETSC_FALSE;
852     /* Trick for efficient searching for sorted rows */
853     if (lastidx > idx) p = 0;
854     lastidx = idx;
855     for (; p < size; ++p) {
856       if (idx >= owners[p] && idx < owners[p+1]) {
857         rrows[r].rank  = p;
858         rrows[r].index = rows[r] - owners[p];
859         found = PETSC_TRUE;
860         break;
861       }
862     }
863     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
864   }
865   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
866   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
867   /* Collect flags for rows to be zeroed */
868   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
869   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
870   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
871   /* Compress and put in row numbers */
872   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
873   /* zero diagonal part of matrix */
874   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
875   /* handle off diagonal part of matrix */
876   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
877   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
878   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
879   for (i=0; i<len; i++) bb[lrows[i]] = 1;
880   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
881   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
884   if (x) {
885     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
888     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
889   }
890   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
891   /* remove zeroed rows of off diagonal matrix */
892   ii = aij->i;
893   for (i=0; i<len; i++) {
894     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
895   }
896   /* loop over all elements of off process part of matrix zeroing removed columns*/
897   if (aij->compressedrow.use) {
898     m    = aij->compressedrow.nrows;
899     ii   = aij->compressedrow.i;
900     ridx = aij->compressedrow.rindex;
901     for (i=0; i<m; i++) {
902       n  = ii[i+1] - ii[i];
903       aj = aij->j + ii[i];
904       aa = aij->a + ii[i];
905 
906       for (j=0; j<n; j++) {
907         if (PetscAbsScalar(mask[*aj])) {
908           if (b) bb[*ridx] -= *aa*xx[*aj];
909           *aa = 0.0;
910         }
911         aa++;
912         aj++;
913       }
914       ridx++;
915     }
916   } else { /* do not use compressed row format */
917     m = l->B->rmap->n;
918     for (i=0; i<m; i++) {
919       n  = ii[i+1] - ii[i];
920       aj = aij->j + ii[i];
921       aa = aij->a + ii[i];
922       for (j=0; j<n; j++) {
923         if (PetscAbsScalar(mask[*aj])) {
924           if (b) bb[i] -= *aa*xx[*aj];
925           *aa = 0.0;
926         }
927         aa++;
928         aj++;
929       }
930     }
931   }
932   if (x) {
933     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
934     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
935   }
936   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
937   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
938   ierr = PetscFree(lrows);CHKERRQ(ierr);
939 
940   /* only change matrix nonzero state if pattern was allowed to be changed */
941   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
942     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
943     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
944   }
945   PetscFunctionReturn(0);
946 }
947 
948 #undef __FUNCT__
949 #define __FUNCT__ "MatMult_MPIAIJ"
950 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
951 {
952   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
953   PetscErrorCode ierr;
954   PetscInt       nt;
955 
956   PetscFunctionBegin;
957   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
958   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
959   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
960   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
961   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
968 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
969 {
970   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
971   PetscErrorCode ierr;
972 
973   PetscFunctionBegin;
974   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatMultAdd_MPIAIJ"
980 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
981 {
982   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
988   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
996 {
997   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
998   PetscErrorCode ierr;
999   PetscBool      merged;
1000 
1001   PetscFunctionBegin;
1002   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1003   /* do nondiagonal part */
1004   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1005   if (!merged) {
1006     /* send it on its way */
1007     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1008     /* do local part */
1009     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1010     /* receive remote parts: note this assumes the values are not actually */
1011     /* added in yy until the next line, */
1012     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1013   } else {
1014     /* do local part */
1015     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1016     /* send it on its way */
1017     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1018     /* values actually were received in the Begin() but we need to call this nop */
1019     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1026 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1027 {
1028   MPI_Comm       comm;
1029   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1030   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1031   IS             Me,Notme;
1032   PetscErrorCode ierr;
1033   PetscInt       M,N,first,last,*notme,i;
1034   PetscMPIInt    size;
1035 
1036   PetscFunctionBegin;
1037   /* Easy test: symmetric diagonal block */
1038   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1039   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1040   if (!*f) PetscFunctionReturn(0);
1041   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1042   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1043   if (size == 1) PetscFunctionReturn(0);
1044 
1045   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1046   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1047   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1048   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1049   for (i=0; i<first; i++) notme[i] = i;
1050   for (i=last; i<M; i++) notme[i-last+first] = i;
1051   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1052   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1053   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1054   Aoff = Aoffs[0];
1055   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1056   Boff = Boffs[0];
1057   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1059   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1061   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1062   ierr = PetscFree(notme);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1069 {
1070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1071   PetscErrorCode ierr;
1072 
1073   PetscFunctionBegin;
1074   /* do nondiagonal part */
1075   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1076   /* send it on its way */
1077   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1078   /* do local part */
1079   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1080   /* receive remote parts */
1081   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 /*
1086   This only works correctly for square matrices where the subblock A->A is the
1087    diagonal block
1088 */
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1091 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1092 {
1093   PetscErrorCode ierr;
1094   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1095 
1096   PetscFunctionBegin;
1097   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1098   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1099   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatScale_MPIAIJ"
1105 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1112   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatDestroy_MPIAIJ"
1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1119 {
1120   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124 #if defined(PETSC_USE_LOG)
1125   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1126 #endif
1127   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1128   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1129   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1130   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1131 #if defined(PETSC_USE_CTABLE)
1132   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1133 #else
1134   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1135 #endif
1136   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1137   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1138   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1139   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1140   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1141   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1142 
1143   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1157 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1158 {
1159   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1160   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1161   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1162   PetscErrorCode ierr;
1163   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1164   int            fd;
1165   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1166   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1167   PetscScalar    *column_values;
1168   PetscInt       message_count,flowcontrolcount;
1169   FILE           *file;
1170 
1171   PetscFunctionBegin;
1172   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1173   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1174   nz   = A->nz + B->nz;
1175   if (!rank) {
1176     header[0] = MAT_FILE_CLASSID;
1177     header[1] = mat->rmap->N;
1178     header[2] = mat->cmap->N;
1179 
1180     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1181     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1182     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1183     /* get largest number of rows any processor has */
1184     rlen  = mat->rmap->n;
1185     range = mat->rmap->range;
1186     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1187   } else {
1188     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1189     rlen = mat->rmap->n;
1190   }
1191 
1192   /* load up the local row counts */
1193   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1194   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1195 
1196   /* store the row lengths to the file */
1197   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1198   if (!rank) {
1199     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1200     for (i=1; i<size; i++) {
1201       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1202       rlen = range[i+1] - range[i];
1203       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1204       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     }
1206     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1207   } else {
1208     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1209     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1211   }
1212   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1213 
1214   /* load up the local column indices */
1215   nzmax = nz; /* th processor needs space a largest processor needs */
1216   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1217   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1218   cnt   = 0;
1219   for (i=0; i<mat->rmap->n; i++) {
1220     for (j=B->i[i]; j<B->i[i+1]; j++) {
1221       if ((col = garray[B->j[j]]) > cstart) break;
1222       column_indices[cnt++] = col;
1223     }
1224     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1225     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1226   }
1227   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1228 
1229   /* store the column indices to the file */
1230   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1231   if (!rank) {
1232     MPI_Status status;
1233     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1234     for (i=1; i<size; i++) {
1235       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1236       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1237       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1238       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1240     }
1241     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1242   } else {
1243     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1244     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1245     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1246     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1247   }
1248   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1249 
1250   /* load up the local column values */
1251   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1252   cnt  = 0;
1253   for (i=0; i<mat->rmap->n; i++) {
1254     for (j=B->i[i]; j<B->i[i+1]; j++) {
1255       if (garray[B->j[j]] > cstart) break;
1256       column_values[cnt++] = B->a[j];
1257     }
1258     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1259     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1260   }
1261   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1262 
1263   /* store the column values to the file */
1264   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1265   if (!rank) {
1266     MPI_Status status;
1267     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1268     for (i=1; i<size; i++) {
1269       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1270       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1271       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1272       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1273       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1274     }
1275     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1276   } else {
1277     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1278     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1279     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1280     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1281   }
1282   ierr = PetscFree(column_values);CHKERRQ(ierr);
1283 
1284   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1285   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 #include <petscdraw.h>
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1292 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1293 {
1294   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank = aij->rank,size = aij->size;
1297   PetscBool         isdraw,iascii,isbinary;
1298   PetscViewer       sviewer;
1299   PetscViewerFormat format;
1300 
1301   PetscFunctionBegin;
1302   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1303   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1304   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1305   if (iascii) {
1306     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1307     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1308       MatInfo   info;
1309       PetscBool inodes;
1310 
1311       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1312       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1313       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1314       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1315       if (!inodes) {
1316         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1317                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1318       } else {
1319         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1320                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1321       }
1322       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1323       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1324       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1325       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1326       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1328       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1329       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1330       PetscFunctionReturn(0);
1331     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1332       PetscInt inodecount,inodelimit,*inodes;
1333       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1334       if (inodes) {
1335         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1336       } else {
1337         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1338       }
1339       PetscFunctionReturn(0);
1340     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1341       PetscFunctionReturn(0);
1342     }
1343   } else if (isbinary) {
1344     if (size == 1) {
1345       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1346       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1347     } else {
1348       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1349     }
1350     PetscFunctionReturn(0);
1351   } else if (isdraw) {
1352     PetscDraw draw;
1353     PetscBool isnull;
1354     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1355     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1356   }
1357 
1358   if (size == 1) {
1359     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1360     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1361   } else {
1362     /* assemble the entire matrix onto first processor. */
1363     Mat        A;
1364     Mat_SeqAIJ *Aloc;
1365     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1366     MatScalar  *a;
1367 
1368     if (mat->rmap->N > 1024) {
1369       PetscBool flg = PETSC_FALSE;
1370 
1371       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1372       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1373     }
1374 
1375     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1376     if (!rank) {
1377       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1378     } else {
1379       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1380     }
1381     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1382     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1383     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1384     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1385     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1386 
1387     /* copy over the A part */
1388     Aloc = (Mat_SeqAIJ*)aij->A->data;
1389     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1390     row  = mat->rmap->rstart;
1391     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1392     for (i=0; i<m; i++) {
1393       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1394       row++;
1395       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1396     }
1397     aj = Aloc->j;
1398     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1399 
1400     /* copy over the B part */
1401     Aloc = (Mat_SeqAIJ*)aij->B->data;
1402     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1403     row  = mat->rmap->rstart;
1404     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1405     ct   = cols;
1406     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1407     for (i=0; i<m; i++) {
1408       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1409       row++;
1410       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1411     }
1412     ierr = PetscFree(ct);CHKERRQ(ierr);
1413     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1414     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1415     /*
1416        Everyone has to call to draw the matrix since the graphics waits are
1417        synchronized across all processors that share the PetscDraw object
1418     */
1419     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1420     if (!rank) {
1421       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1422       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1423       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1424       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1425     }
1426     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1427     ierr = MatDestroy(&A);CHKERRQ(ierr);
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatView_MPIAIJ"
1434 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1435 {
1436   PetscErrorCode ierr;
1437   PetscBool      iascii,isdraw,issocket,isbinary;
1438 
1439   PetscFunctionBegin;
1440   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1441   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1442   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1443   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1444   if (iascii || isdraw || isbinary || issocket) {
1445     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1446   }
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatSOR_MPIAIJ"
1452 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1453 {
1454   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1455   PetscErrorCode ierr;
1456   Vec            bb1 = 0;
1457   PetscBool      hasop;
1458 
1459   PetscFunctionBegin;
1460   if (flag == SOR_APPLY_UPPER) {
1461     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1462     PetscFunctionReturn(0);
1463   }
1464 
1465   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1466     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1467   }
1468 
1469   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1470     if (flag & SOR_ZERO_INITIAL_GUESS) {
1471       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1472       its--;
1473     }
1474 
1475     while (its--) {
1476       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1477       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1478 
1479       /* update rhs: bb1 = bb - B*x */
1480       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1481       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1482 
1483       /* local sweep */
1484       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1485     }
1486   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1487     if (flag & SOR_ZERO_INITIAL_GUESS) {
1488       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1489       its--;
1490     }
1491     while (its--) {
1492       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1493       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1494 
1495       /* update rhs: bb1 = bb - B*x */
1496       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1497       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1498 
1499       /* local sweep */
1500       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1501     }
1502   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1503     if (flag & SOR_ZERO_INITIAL_GUESS) {
1504       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1505       its--;
1506     }
1507     while (its--) {
1508       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1510 
1511       /* update rhs: bb1 = bb - B*x */
1512       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1513       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1514 
1515       /* local sweep */
1516       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1517     }
1518   } else if (flag & SOR_EISENSTAT) {
1519     Vec xx1;
1520 
1521     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1522     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1523 
1524     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1525     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1526     if (!mat->diag) {
1527       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1528       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1529     }
1530     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1531     if (hasop) {
1532       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1533     } else {
1534       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1535     }
1536     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1537 
1538     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1539 
1540     /* local sweep */
1541     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1542     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1543     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1544   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1545 
1546   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatPermute_MPIAIJ"
1552 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1553 {
1554   Mat            aA,aB,Aperm;
1555   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1556   PetscScalar    *aa,*ba;
1557   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1558   PetscSF        rowsf,sf;
1559   IS             parcolp = NULL;
1560   PetscBool      done;
1561   PetscErrorCode ierr;
1562 
1563   PetscFunctionBegin;
1564   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1565   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1566   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1567   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1568 
1569   /* Invert row permutation to find out where my rows should go */
1570   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1571   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1572   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1573   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1574   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1575   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1576 
1577   /* Invert column permutation to find out where my columns should go */
1578   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1579   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1580   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1581   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1582   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1583   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1585 
1586   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1587   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1588   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1589 
1590   /* Find out where my gcols should go */
1591   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1592   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1593   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1594   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1595   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1596   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1597   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1598   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1599 
1600   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1601   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1602   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1603   for (i=0; i<m; i++) {
1604     PetscInt row = rdest[i],rowner;
1605     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1606     for (j=ai[i]; j<ai[i+1]; j++) {
1607       PetscInt cowner,col = cdest[aj[j]];
1608       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1609       if (rowner == cowner) dnnz[i]++;
1610       else onnz[i]++;
1611     }
1612     for (j=bi[i]; j<bi[i+1]; j++) {
1613       PetscInt cowner,col = gcdest[bj[j]];
1614       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1615       if (rowner == cowner) dnnz[i]++;
1616       else onnz[i]++;
1617     }
1618   }
1619   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1620   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1621   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1622   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1623   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1624 
1625   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1626   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1627   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1628   for (i=0; i<m; i++) {
1629     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1630     PetscInt j0,rowlen;
1631     rowlen = ai[i+1] - ai[i];
1632     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1633       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1634       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1635     }
1636     rowlen = bi[i+1] - bi[i];
1637     for (j0=j=0; j<rowlen; j0=j) {
1638       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1639       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1640     }
1641   }
1642   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1644   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1645   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1646   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1647   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1648   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1649   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1650   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1651   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1652   *B = Aperm;
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1658 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1659 {
1660   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1661   Mat            A    = mat->A,B = mat->B;
1662   PetscErrorCode ierr;
1663   PetscReal      isend[5],irecv[5];
1664 
1665   PetscFunctionBegin;
1666   info->block_size = 1.0;
1667   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1668 
1669   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1670   isend[3] = info->memory;  isend[4] = info->mallocs;
1671 
1672   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1673 
1674   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1675   isend[3] += info->memory;  isend[4] += info->mallocs;
1676   if (flag == MAT_LOCAL) {
1677     info->nz_used      = isend[0];
1678     info->nz_allocated = isend[1];
1679     info->nz_unneeded  = isend[2];
1680     info->memory       = isend[3];
1681     info->mallocs      = isend[4];
1682   } else if (flag == MAT_GLOBAL_MAX) {
1683     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1684 
1685     info->nz_used      = irecv[0];
1686     info->nz_allocated = irecv[1];
1687     info->nz_unneeded  = irecv[2];
1688     info->memory       = irecv[3];
1689     info->mallocs      = irecv[4];
1690   } else if (flag == MAT_GLOBAL_SUM) {
1691     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1692 
1693     info->nz_used      = irecv[0];
1694     info->nz_allocated = irecv[1];
1695     info->nz_unneeded  = irecv[2];
1696     info->memory       = irecv[3];
1697     info->mallocs      = irecv[4];
1698   }
1699   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1700   info->fill_ratio_needed = 0;
1701   info->factor_mallocs    = 0;
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatSetOption_MPIAIJ"
1707 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1708 {
1709   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   switch (op) {
1714   case MAT_NEW_NONZERO_LOCATIONS:
1715   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1716   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1717   case MAT_KEEP_NONZERO_PATTERN:
1718   case MAT_NEW_NONZERO_LOCATION_ERR:
1719   case MAT_USE_INODES:
1720   case MAT_IGNORE_ZERO_ENTRIES:
1721     MatCheckPreallocated(A,1);
1722     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1723     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1724     break;
1725   case MAT_ROW_ORIENTED:
1726     a->roworiented = flg;
1727 
1728     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1729     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1730     break;
1731   case MAT_NEW_DIAGONALS:
1732     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1733     break;
1734   case MAT_IGNORE_OFF_PROC_ENTRIES:
1735     a->donotstash = flg;
1736     break;
1737   case MAT_SPD:
1738     A->spd_set = PETSC_TRUE;
1739     A->spd     = flg;
1740     if (flg) {
1741       A->symmetric                  = PETSC_TRUE;
1742       A->structurally_symmetric     = PETSC_TRUE;
1743       A->symmetric_set              = PETSC_TRUE;
1744       A->structurally_symmetric_set = PETSC_TRUE;
1745     }
1746     break;
1747   case MAT_SYMMETRIC:
1748     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1749     break;
1750   case MAT_STRUCTURALLY_SYMMETRIC:
1751     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1752     break;
1753   case MAT_HERMITIAN:
1754     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1755     break;
1756   case MAT_SYMMETRY_ETERNAL:
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     break;
1759   default:
1760     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1761   }
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatGetRow_MPIAIJ"
1767 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1768 {
1769   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1770   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1771   PetscErrorCode ierr;
1772   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1773   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1774   PetscInt       *cmap,*idx_p;
1775 
1776   PetscFunctionBegin;
1777   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1778   mat->getrowactive = PETSC_TRUE;
1779 
1780   if (!mat->rowvalues && (idx || v)) {
1781     /*
1782         allocate enough space to hold information from the longest row.
1783     */
1784     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1785     PetscInt   max = 1,tmp;
1786     for (i=0; i<matin->rmap->n; i++) {
1787       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1788       if (max < tmp) max = tmp;
1789     }
1790     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1791   }
1792 
1793   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1794   lrow = row - rstart;
1795 
1796   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1797   if (!v)   {pvA = 0; pvB = 0;}
1798   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1799   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1800   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1801   nztot = nzA + nzB;
1802 
1803   cmap = mat->garray;
1804   if (v  || idx) {
1805     if (nztot) {
1806       /* Sort by increasing column numbers, assuming A and B already sorted */
1807       PetscInt imark = -1;
1808       if (v) {
1809         *v = v_p = mat->rowvalues;
1810         for (i=0; i<nzB; i++) {
1811           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1812           else break;
1813         }
1814         imark = i;
1815         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1816         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1817       }
1818       if (idx) {
1819         *idx = idx_p = mat->rowindices;
1820         if (imark > -1) {
1821           for (i=0; i<imark; i++) {
1822             idx_p[i] = cmap[cworkB[i]];
1823           }
1824         } else {
1825           for (i=0; i<nzB; i++) {
1826             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1827             else break;
1828           }
1829           imark = i;
1830         }
1831         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1832         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1833       }
1834     } else {
1835       if (idx) *idx = 0;
1836       if (v)   *v   = 0;
1837     }
1838   }
1839   *nz  = nztot;
1840   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1841   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 #undef __FUNCT__
1846 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1847 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1848 {
1849   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1850 
1851   PetscFunctionBegin;
1852   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1853   aij->getrowactive = PETSC_FALSE;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatNorm_MPIAIJ"
1859 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1860 {
1861   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1862   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1863   PetscErrorCode ierr;
1864   PetscInt       i,j,cstart = mat->cmap->rstart;
1865   PetscReal      sum = 0.0;
1866   MatScalar      *v;
1867 
1868   PetscFunctionBegin;
1869   if (aij->size == 1) {
1870     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1871   } else {
1872     if (type == NORM_FROBENIUS) {
1873       v = amat->a;
1874       for (i=0; i<amat->nz; i++) {
1875         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1876       }
1877       v = bmat->a;
1878       for (i=0; i<bmat->nz; i++) {
1879         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1880       }
1881       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1882       *norm = PetscSqrtReal(*norm);
1883     } else if (type == NORM_1) { /* max column norm */
1884       PetscReal *tmp,*tmp2;
1885       PetscInt  *jj,*garray = aij->garray;
1886       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1887       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1888       *norm = 0.0;
1889       v     = amat->a; jj = amat->j;
1890       for (j=0; j<amat->nz; j++) {
1891         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1892       }
1893       v = bmat->a; jj = bmat->j;
1894       for (j=0; j<bmat->nz; j++) {
1895         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1896       }
1897       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1898       for (j=0; j<mat->cmap->N; j++) {
1899         if (tmp2[j] > *norm) *norm = tmp2[j];
1900       }
1901       ierr = PetscFree(tmp);CHKERRQ(ierr);
1902       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1903     } else if (type == NORM_INFINITY) { /* max row norm */
1904       PetscReal ntemp = 0.0;
1905       for (j=0; j<aij->A->rmap->n; j++) {
1906         v   = amat->a + amat->i[j];
1907         sum = 0.0;
1908         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1909           sum += PetscAbsScalar(*v); v++;
1910         }
1911         v = bmat->a + bmat->i[j];
1912         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1913           sum += PetscAbsScalar(*v); v++;
1914         }
1915         if (sum > ntemp) ntemp = sum;
1916       }
1917       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1918     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1919   }
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatTranspose_MPIAIJ"
1925 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1926 {
1927   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1928   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1929   PetscErrorCode ierr;
1930   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1931   PetscInt       cstart = A->cmap->rstart,ncol;
1932   Mat            B;
1933   MatScalar      *array;
1934 
1935   PetscFunctionBegin;
1936   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1937 
1938   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1939   ai = Aloc->i; aj = Aloc->j;
1940   bi = Bloc->i; bj = Bloc->j;
1941   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1942     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1943     PetscSFNode          *oloc;
1944     PETSC_UNUSED PetscSF sf;
1945 
1946     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1947     /* compute d_nnz for preallocation */
1948     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1949     for (i=0; i<ai[ma]; i++) {
1950       d_nnz[aj[i]]++;
1951       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1952     }
1953     /* compute local off-diagonal contributions */
1954     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1955     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1956     /* map those to global */
1957     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1958     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1959     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1960     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1961     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1962     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1963     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1964 
1965     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1966     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1967     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1968     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1969     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1970     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1971   } else {
1972     B    = *matout;
1973     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1974     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1975   }
1976 
1977   /* copy over the A part */
1978   array = Aloc->a;
1979   row   = A->rmap->rstart;
1980   for (i=0; i<ma; i++) {
1981     ncol = ai[i+1]-ai[i];
1982     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1983     row++;
1984     array += ncol; aj += ncol;
1985   }
1986   aj = Aloc->j;
1987   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1988 
1989   /* copy over the B part */
1990   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1991   array = Bloc->a;
1992   row   = A->rmap->rstart;
1993   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1994   cols_tmp = cols;
1995   for (i=0; i<mb; i++) {
1996     ncol = bi[i+1]-bi[i];
1997     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1998     row++;
1999     array += ncol; cols_tmp += ncol;
2000   }
2001   ierr = PetscFree(cols);CHKERRQ(ierr);
2002 
2003   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2004   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2005   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2006     *matout = B;
2007   } else {
2008     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 #undef __FUNCT__
2014 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2015 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2016 {
2017   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2018   Mat            a    = aij->A,b = aij->B;
2019   PetscErrorCode ierr;
2020   PetscInt       s1,s2,s3;
2021 
2022   PetscFunctionBegin;
2023   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2024   if (rr) {
2025     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2026     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2027     /* Overlap communication with computation. */
2028     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2029   }
2030   if (ll) {
2031     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2032     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2033     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2034   }
2035   /* scale  the diagonal block */
2036   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2037 
2038   if (rr) {
2039     /* Do a scatter end and then right scale the off-diagonal block */
2040     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2041     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2042   }
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2048 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2049 {
2050   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2051   PetscErrorCode ierr;
2052 
2053   PetscFunctionBegin;
2054   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "MatEqual_MPIAIJ"
2060 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2061 {
2062   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2063   Mat            a,b,c,d;
2064   PetscBool      flg;
2065   PetscErrorCode ierr;
2066 
2067   PetscFunctionBegin;
2068   a = matA->A; b = matA->B;
2069   c = matB->A; d = matB->B;
2070 
2071   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2072   if (flg) {
2073     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2074   }
2075   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatCopy_MPIAIJ"
2081 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2082 {
2083   PetscErrorCode ierr;
2084   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2085   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2086 
2087   PetscFunctionBegin;
2088   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2089   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2090     /* because of the column compression in the off-processor part of the matrix a->B,
2091        the number of columns in a->B and b->B may be different, hence we cannot call
2092        the MatCopy() directly on the two parts. If need be, we can provide a more
2093        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2094        then copying the submatrices */
2095     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2096   } else {
2097     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2098     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2099   }
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 #undef __FUNCT__
2104 #define __FUNCT__ "MatSetUp_MPIAIJ"
2105 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2106 {
2107   PetscErrorCode ierr;
2108 
2109   PetscFunctionBegin;
2110   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 #undef __FUNCT__
2115 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2116 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2117 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2118 {
2119   PetscInt       i,m=Y->rmap->N;
2120   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2121   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2122   const PetscInt *xi = x->i,*yi = y->i;
2123 
2124   PetscFunctionBegin;
2125   /* Set the number of nonzeros in the new matrix */
2126   for (i=0; i<m; i++) {
2127     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2128     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2129     nnz[i] = 0;
2130     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2131       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2132       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2133       nnz[i]++;
2134     }
2135     for (; k<nzy; k++) nnz[i]++;
2136   }
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatAXPY_MPIAIJ"
2142 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2143 {
2144   PetscErrorCode ierr;
2145   PetscInt       i;
2146   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2147   PetscBLASInt   bnz,one=1;
2148   Mat_SeqAIJ     *x,*y;
2149 
2150   PetscFunctionBegin;
2151   if (str == SAME_NONZERO_PATTERN) {
2152     PetscScalar alpha = a;
2153     x    = (Mat_SeqAIJ*)xx->A->data;
2154     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2155     y    = (Mat_SeqAIJ*)yy->A->data;
2156     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2157     x    = (Mat_SeqAIJ*)xx->B->data;
2158     y    = (Mat_SeqAIJ*)yy->B->data;
2159     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2160     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2161   } else if (str == SUBSET_NONZERO_PATTERN) {
2162     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2163 
2164     x = (Mat_SeqAIJ*)xx->B->data;
2165     y = (Mat_SeqAIJ*)yy->B->data;
2166     if (y->xtoy && y->XtoY != xx->B) {
2167       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2168       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2169     }
2170     if (!y->xtoy) { /* get xtoy */
2171       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2172       y->XtoY = xx->B;
2173       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2174     }
2175     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2176   } else {
2177     Mat      B;
2178     PetscInt *nnz_d,*nnz_o;
2179     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2180     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2181     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2182     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2183     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2184     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2185     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2186     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2187     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2188     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2189     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2190     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2191     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2192     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2193   }
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2198 
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatConjugate_MPIAIJ"
2201 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2202 {
2203 #if defined(PETSC_USE_COMPLEX)
2204   PetscErrorCode ierr;
2205   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2209   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2210 #else
2211   PetscFunctionBegin;
2212 #endif
2213   PetscFunctionReturn(0);
2214 }
2215 
2216 #undef __FUNCT__
2217 #define __FUNCT__ "MatRealPart_MPIAIJ"
2218 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2219 {
2220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2221   PetscErrorCode ierr;
2222 
2223   PetscFunctionBegin;
2224   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2225   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 #undef __FUNCT__
2230 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2231 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2232 {
2233   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2234   PetscErrorCode ierr;
2235 
2236   PetscFunctionBegin;
2237   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2238   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2239   PetscFunctionReturn(0);
2240 }
2241 
2242 #if defined(PETSC_HAVE_PBGL)
2243 
2244 #include <boost/parallel/mpi/bsp_process_group.hpp>
2245 #include <boost/graph/distributed/ilu_default_graph.hpp>
2246 #include <boost/graph/distributed/ilu_0_block.hpp>
2247 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2248 #include <boost/graph/distributed/petsc/interface.hpp>
2249 #include <boost/multi_array.hpp>
2250 #include <boost/parallel/distributed_property_map->hpp>
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2254 /*
2255   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2256 */
2257 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2258 {
2259   namespace petsc = boost::distributed::petsc;
2260 
2261   namespace graph_dist = boost::graph::distributed;
2262   using boost::graph::distributed::ilu_default::process_group_type;
2263   using boost::graph::ilu_permuted;
2264 
2265   PetscBool      row_identity, col_identity;
2266   PetscContainer c;
2267   PetscInt       m, n, M, N;
2268   PetscErrorCode ierr;
2269 
2270   PetscFunctionBegin;
2271   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2272   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2273   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2274   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2275 
2276   process_group_type pg;
2277   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2278   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2279   lgraph_type& level_graph = *lgraph_p;
2280   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2281 
2282   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2283   ilu_permuted(level_graph);
2284 
2285   /* put together the new matrix */
2286   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2287   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2288   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2289   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2290   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2291   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2292   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2293   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2294 
2295   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2296   ierr = PetscContainerSetPointer(c, lgraph_p);
2297   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2298   ierr = PetscContainerDestroy(&c);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2304 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2305 {
2306   PetscFunctionBegin;
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 #undef __FUNCT__
2311 #define __FUNCT__ "MatSolve_MPIAIJ"
2312 /*
2313   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2314 */
2315 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2316 {
2317   namespace graph_dist = boost::graph::distributed;
2318 
2319   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2320   lgraph_type    *lgraph_p;
2321   PetscContainer c;
2322   PetscErrorCode ierr;
2323 
2324   PetscFunctionBegin;
2325   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2326   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2327   ierr = VecCopy(b, x);CHKERRQ(ierr);
2328 
2329   PetscScalar *array_x;
2330   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2331   PetscInt sx;
2332   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2333 
2334   PetscScalar *array_b;
2335   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2336   PetscInt sb;
2337   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2338 
2339   lgraph_type& level_graph = *lgraph_p;
2340   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2341 
2342   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2343   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2344   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2345 
2346   typedef boost::iterator_property_map<array_ref_type::iterator,
2347                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2348   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2349   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2350 
2351   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2352   PetscFunctionReturn(0);
2353 }
2354 #endif
2355 
2356 #undef __FUNCT__
2357 #define __FUNCT__ "MatDestroy_MatRedundant"
2358 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2359 {
2360   PetscErrorCode ierr;
2361   Mat_Redundant  *redund;
2362   PetscInt       i;
2363   PetscMPIInt    size;
2364 
2365   PetscFunctionBegin;
2366   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2367   if (size == 1) {
2368     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2369     redund = a->redundant;
2370   } else {
2371     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2372     redund = a->redundant;
2373   }
2374   if (redund){
2375     if (redund->matseq) { /* via MatGetSubMatrices()  */
2376       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2377       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2378       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2379       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2380     } else {
2381       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2382       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2383       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2384       for (i=0; i<redund->nrecvs; i++) {
2385         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2386         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2387       }
2388       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2389     }
2390 
2391     if (redund->psubcomm) {
2392       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2393     }
2394     ierr = redund->Destroy(A);CHKERRQ(ierr);
2395     ierr = PetscFree(redund);CHKERRQ(ierr);
2396   }
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2402 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2403 {
2404   PetscMPIInt    rank,size;
2405   MPI_Comm       comm;
2406   PetscErrorCode ierr;
2407   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2408   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2409   PetscInt       *rowrange = mat->rmap->range;
2410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2411   Mat            A = aij->A,B=aij->B,C=*matredundant;
2412   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2413   PetscScalar    *sbuf_a;
2414   PetscInt       nzlocal=a->nz+b->nz;
2415   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2416   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2417   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2418   MatScalar      *aworkA,*aworkB;
2419   PetscScalar    *vals;
2420   PetscMPIInt    tag1,tag2,tag3,imdex;
2421   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2422   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2423   MPI_Status     recv_status,*send_status;
2424   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2425   PetscInt       **rbuf_j=NULL;
2426   PetscScalar    **rbuf_a=NULL;
2427   Mat_Redundant  *redund =NULL;
2428 
2429   PetscFunctionBegin;
2430   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2431   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2432   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2433   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2434   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2435 
2436   if (reuse == MAT_REUSE_MATRIX) {
2437     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2438     if (subsize == 1) {
2439       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2440       redund = c->redundant;
2441     } else {
2442       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2443       redund = c->redundant;
2444     }
2445     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2446 
2447     nsends    = redund->nsends;
2448     nrecvs    = redund->nrecvs;
2449     send_rank = redund->send_rank;
2450     recv_rank = redund->recv_rank;
2451     sbuf_nz   = redund->sbuf_nz;
2452     rbuf_nz   = redund->rbuf_nz;
2453     sbuf_j    = redund->sbuf_j;
2454     sbuf_a    = redund->sbuf_a;
2455     rbuf_j    = redund->rbuf_j;
2456     rbuf_a    = redund->rbuf_a;
2457   }
2458 
2459   if (reuse == MAT_INITIAL_MATRIX) {
2460     PetscInt    nleftover,np_subcomm;
2461 
2462     /* get the destination processors' id send_rank, nsends and nrecvs */
2463     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2464 
2465     np_subcomm = size/nsubcomm;
2466     nleftover  = size - nsubcomm*np_subcomm;
2467 
2468     /* block of codes below is specific for INTERLACED */
2469     /* ------------------------------------------------*/
2470     nsends = 0; nrecvs = 0;
2471     for (i=0; i<size; i++) {
2472       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2473         send_rank[nsends++] = i;
2474         recv_rank[nrecvs++] = i;
2475       }
2476     }
2477     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2478       i = size-nleftover-1;
2479       j = 0;
2480       while (j < nsubcomm - nleftover) {
2481         send_rank[nsends++] = i;
2482         i--; j++;
2483       }
2484     }
2485 
2486     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2487       for (i=0; i<nleftover; i++) {
2488         recv_rank[nrecvs++] = size-nleftover+i;
2489       }
2490     }
2491     /*----------------------------------------------*/
2492 
2493     /* allocate sbuf_j, sbuf_a */
2494     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2495     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2496     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2497     /*
2498     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2499     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2500      */
2501   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2502 
2503   /* copy mat's local entries into the buffers */
2504   if (reuse == MAT_INITIAL_MATRIX) {
2505     rownz_max = 0;
2506     rptr      = sbuf_j;
2507     cols      = sbuf_j + rend-rstart + 1;
2508     vals      = sbuf_a;
2509     rptr[0]   = 0;
2510     for (i=0; i<rend-rstart; i++) {
2511       row    = i + rstart;
2512       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2513       ncols  = nzA + nzB;
2514       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2515       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2516       /* load the column indices for this row into cols */
2517       lwrite = 0;
2518       for (l=0; l<nzB; l++) {
2519         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2520           vals[lwrite]   = aworkB[l];
2521           cols[lwrite++] = ctmp;
2522         }
2523       }
2524       for (l=0; l<nzA; l++) {
2525         vals[lwrite]   = aworkA[l];
2526         cols[lwrite++] = cstart + cworkA[l];
2527       }
2528       for (l=0; l<nzB; l++) {
2529         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2530           vals[lwrite]   = aworkB[l];
2531           cols[lwrite++] = ctmp;
2532         }
2533       }
2534       vals     += ncols;
2535       cols     += ncols;
2536       rptr[i+1] = rptr[i] + ncols;
2537       if (rownz_max < ncols) rownz_max = ncols;
2538     }
2539     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2540   } else { /* only copy matrix values into sbuf_a */
2541     rptr    = sbuf_j;
2542     vals    = sbuf_a;
2543     rptr[0] = 0;
2544     for (i=0; i<rend-rstart; i++) {
2545       row    = i + rstart;
2546       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2547       ncols  = nzA + nzB;
2548       cworkB = b->j + b->i[i];
2549       aworkA = a->a + a->i[i];
2550       aworkB = b->a + b->i[i];
2551       lwrite = 0;
2552       for (l=0; l<nzB; l++) {
2553         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2554       }
2555       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2556       for (l=0; l<nzB; l++) {
2557         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2558       }
2559       vals     += ncols;
2560       rptr[i+1] = rptr[i] + ncols;
2561     }
2562   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2563 
2564   /* send nzlocal to others, and recv other's nzlocal */
2565   /*--------------------------------------------------*/
2566   if (reuse == MAT_INITIAL_MATRIX) {
2567     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2568 
2569     s_waits2 = s_waits3 + nsends;
2570     s_waits1 = s_waits2 + nsends;
2571     r_waits1 = s_waits1 + nsends;
2572     r_waits2 = r_waits1 + nrecvs;
2573     r_waits3 = r_waits2 + nrecvs;
2574   } else {
2575     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2576 
2577     r_waits3 = s_waits3 + nsends;
2578   }
2579 
2580   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2581   if (reuse == MAT_INITIAL_MATRIX) {
2582     /* get new tags to keep the communication clean */
2583     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2584     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2585     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2586 
2587     /* post receives of other's nzlocal */
2588     for (i=0; i<nrecvs; i++) {
2589       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2590     }
2591     /* send nzlocal to others */
2592     for (i=0; i<nsends; i++) {
2593       sbuf_nz[i] = nzlocal;
2594       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2595     }
2596     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2597     count = nrecvs;
2598     while (count) {
2599       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2600 
2601       recv_rank[imdex] = recv_status.MPI_SOURCE;
2602       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2603       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2604 
2605       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2606 
2607       rbuf_nz[imdex] += i + 2;
2608 
2609       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2610       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2611       count--;
2612     }
2613     /* wait on sends of nzlocal */
2614     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2615     /* send mat->i,j to others, and recv from other's */
2616     /*------------------------------------------------*/
2617     for (i=0; i<nsends; i++) {
2618       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2619       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2620     }
2621     /* wait on receives of mat->i,j */
2622     /*------------------------------*/
2623     count = nrecvs;
2624     while (count) {
2625       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2626       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2627       count--;
2628     }
2629     /* wait on sends of mat->i,j */
2630     /*---------------------------*/
2631     if (nsends) {
2632       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2633     }
2634   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2635 
2636   /* post receives, send and receive mat->a */
2637   /*----------------------------------------*/
2638   for (imdex=0; imdex<nrecvs; imdex++) {
2639     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2640   }
2641   for (i=0; i<nsends; i++) {
2642     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2643   }
2644   count = nrecvs;
2645   while (count) {
2646     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2647     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2648     count--;
2649   }
2650   if (nsends) {
2651     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2652   }
2653 
2654   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2655 
2656   /* create redundant matrix */
2657   /*-------------------------*/
2658   if (reuse == MAT_INITIAL_MATRIX) {
2659     const PetscInt *range;
2660     PetscInt       rstart_sub,rend_sub,mloc_sub;
2661 
2662     /* compute rownz_max for preallocation */
2663     for (imdex=0; imdex<nrecvs; imdex++) {
2664       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2665       rptr = rbuf_j[imdex];
2666       for (i=0; i<j; i++) {
2667         ncols = rptr[i+1] - rptr[i];
2668         if (rownz_max < ncols) rownz_max = ncols;
2669       }
2670     }
2671 
2672     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2673 
2674     /* get local size of redundant matrix
2675        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2676     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2677     rstart_sub = range[nsubcomm*subrank];
2678     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2679       rend_sub = range[nsubcomm*(subrank+1)];
2680     } else {
2681       rend_sub = mat->rmap->N;
2682     }
2683     mloc_sub = rend_sub - rstart_sub;
2684 
2685     if (M == N) {
2686       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2687     } else { /* non-square matrix */
2688       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2689     }
2690     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2691     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2692     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2693     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2694   } else {
2695     C = *matredundant;
2696   }
2697 
2698   /* insert local matrix entries */
2699   rptr = sbuf_j;
2700   cols = sbuf_j + rend-rstart + 1;
2701   vals = sbuf_a;
2702   for (i=0; i<rend-rstart; i++) {
2703     row   = i + rstart;
2704     ncols = rptr[i+1] - rptr[i];
2705     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2706     vals += ncols;
2707     cols += ncols;
2708   }
2709   /* insert received matrix entries */
2710   for (imdex=0; imdex<nrecvs; imdex++) {
2711     rstart = rowrange[recv_rank[imdex]];
2712     rend   = rowrange[recv_rank[imdex]+1];
2713     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2714     rptr   = rbuf_j[imdex];
2715     cols   = rbuf_j[imdex] + rend-rstart + 1;
2716     vals   = rbuf_a[imdex];
2717     for (i=0; i<rend-rstart; i++) {
2718       row   = i + rstart;
2719       ncols = rptr[i+1] - rptr[i];
2720       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2721       vals += ncols;
2722       cols += ncols;
2723     }
2724   }
2725   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2726   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2727 
2728   if (reuse == MAT_INITIAL_MATRIX) {
2729     *matredundant = C;
2730 
2731     /* create a supporting struct and attach it to C for reuse */
2732     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2733     if (subsize == 1) {
2734       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2735       c->redundant = redund;
2736     } else {
2737       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2738       c->redundant = redund;
2739     }
2740 
2741     redund->nzlocal   = nzlocal;
2742     redund->nsends    = nsends;
2743     redund->nrecvs    = nrecvs;
2744     redund->send_rank = send_rank;
2745     redund->recv_rank = recv_rank;
2746     redund->sbuf_nz   = sbuf_nz;
2747     redund->rbuf_nz   = rbuf_nz;
2748     redund->sbuf_j    = sbuf_j;
2749     redund->sbuf_a    = sbuf_a;
2750     redund->rbuf_j    = rbuf_j;
2751     redund->rbuf_a    = rbuf_a;
2752     redund->psubcomm  = NULL;
2753 
2754     redund->Destroy = C->ops->destroy;
2755     C->ops->destroy = MatDestroy_MatRedundant;
2756   }
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 #undef __FUNCT__
2761 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2762 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2763 {
2764   PetscErrorCode ierr;
2765   MPI_Comm       comm;
2766   PetscMPIInt    size,subsize;
2767   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2768   Mat_Redundant  *redund=NULL;
2769   PetscSubcomm   psubcomm=NULL;
2770   MPI_Comm       subcomm_in=subcomm;
2771   Mat            *matseq;
2772   IS             isrow,iscol;
2773 
2774   PetscFunctionBegin;
2775   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2776     if (reuse ==  MAT_INITIAL_MATRIX) {
2777       /* create psubcomm, then get subcomm */
2778       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2779       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2780       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2781 
2782       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2783       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2784       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2785       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2786       subcomm = psubcomm->comm;
2787     } else { /* retrieve psubcomm and subcomm */
2788       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2789       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2790       if (subsize == 1) {
2791         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2792         redund = c->redundant;
2793       } else {
2794         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2795         redund = c->redundant;
2796       }
2797       psubcomm = redund->psubcomm;
2798     }
2799     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2800       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2801       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2802         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2803         if (subsize == 1) {
2804           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2805           c->redundant->psubcomm = psubcomm;
2806         } else {
2807           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2808           c->redundant->psubcomm = psubcomm ;
2809         }
2810       }
2811       PetscFunctionReturn(0);
2812     }
2813   }
2814 
2815   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2816   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2817   if (reuse == MAT_INITIAL_MATRIX) {
2818     /* create a local sequential matrix matseq[0] */
2819     mloc_sub = PETSC_DECIDE;
2820     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2821     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2822     rstart = rend - mloc_sub;
2823     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2824     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2825   } else { /* reuse == MAT_REUSE_MATRIX */
2826     if (subsize == 1) {
2827       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2828       redund = c->redundant;
2829     } else {
2830       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2831       redund = c->redundant;
2832     }
2833 
2834     isrow  = redund->isrow;
2835     iscol  = redund->iscol;
2836     matseq = redund->matseq;
2837   }
2838   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2839   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2840 
2841   if (reuse == MAT_INITIAL_MATRIX) {
2842     /* create a supporting struct and attach it to C for reuse */
2843     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2844     if (subsize == 1) {
2845       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2846       c->redundant = redund;
2847     } else {
2848       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2849       c->redundant = redund;
2850     }
2851     redund->isrow    = isrow;
2852     redund->iscol    = iscol;
2853     redund->matseq   = matseq;
2854     redund->psubcomm = psubcomm;
2855     redund->Destroy               = (*matredundant)->ops->destroy;
2856     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2857   }
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 #undef __FUNCT__
2862 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2863 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2864 {
2865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2866   PetscErrorCode ierr;
2867   PetscInt       i,*idxb = 0;
2868   PetscScalar    *va,*vb;
2869   Vec            vtmp;
2870 
2871   PetscFunctionBegin;
2872   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2873   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2874   if (idx) {
2875     for (i=0; i<A->rmap->n; i++) {
2876       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2877     }
2878   }
2879 
2880   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2881   if (idx) {
2882     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2883   }
2884   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2885   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2886 
2887   for (i=0; i<A->rmap->n; i++) {
2888     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2889       va[i] = vb[i];
2890       if (idx) idx[i] = a->garray[idxb[i]];
2891     }
2892   }
2893 
2894   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2895   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2896   ierr = PetscFree(idxb);CHKERRQ(ierr);
2897   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 #undef __FUNCT__
2902 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2903 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2904 {
2905   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2906   PetscErrorCode ierr;
2907   PetscInt       i,*idxb = 0;
2908   PetscScalar    *va,*vb;
2909   Vec            vtmp;
2910 
2911   PetscFunctionBegin;
2912   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2913   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2914   if (idx) {
2915     for (i=0; i<A->cmap->n; i++) {
2916       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2917     }
2918   }
2919 
2920   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2921   if (idx) {
2922     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2923   }
2924   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2925   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2926 
2927   for (i=0; i<A->rmap->n; i++) {
2928     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2929       va[i] = vb[i];
2930       if (idx) idx[i] = a->garray[idxb[i]];
2931     }
2932   }
2933 
2934   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2935   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2936   ierr = PetscFree(idxb);CHKERRQ(ierr);
2937   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2938   PetscFunctionReturn(0);
2939 }
2940 
2941 #undef __FUNCT__
2942 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2943 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2944 {
2945   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2946   PetscInt       n      = A->rmap->n;
2947   PetscInt       cstart = A->cmap->rstart;
2948   PetscInt       *cmap  = mat->garray;
2949   PetscInt       *diagIdx, *offdiagIdx;
2950   Vec            diagV, offdiagV;
2951   PetscScalar    *a, *diagA, *offdiagA;
2952   PetscInt       r;
2953   PetscErrorCode ierr;
2954 
2955   PetscFunctionBegin;
2956   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2957   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2958   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2959   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2960   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2961   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2962   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2963   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2964   for (r = 0; r < n; ++r) {
2965     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2966       a[r]   = diagA[r];
2967       idx[r] = cstart + diagIdx[r];
2968     } else {
2969       a[r]   = offdiagA[r];
2970       idx[r] = cmap[offdiagIdx[r]];
2971     }
2972   }
2973   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2974   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2975   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2976   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2977   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2978   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2979   PetscFunctionReturn(0);
2980 }
2981 
2982 #undef __FUNCT__
2983 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2984 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2985 {
2986   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2987   PetscInt       n      = A->rmap->n;
2988   PetscInt       cstart = A->cmap->rstart;
2989   PetscInt       *cmap  = mat->garray;
2990   PetscInt       *diagIdx, *offdiagIdx;
2991   Vec            diagV, offdiagV;
2992   PetscScalar    *a, *diagA, *offdiagA;
2993   PetscInt       r;
2994   PetscErrorCode ierr;
2995 
2996   PetscFunctionBegin;
2997   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2998   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2999   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3000   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3001   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3002   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3003   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3004   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3005   for (r = 0; r < n; ++r) {
3006     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3007       a[r]   = diagA[r];
3008       idx[r] = cstart + diagIdx[r];
3009     } else {
3010       a[r]   = offdiagA[r];
3011       idx[r] = cmap[offdiagIdx[r]];
3012     }
3013   }
3014   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3015   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3016   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3017   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3018   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3019   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3020   PetscFunctionReturn(0);
3021 }
3022 
3023 #undef __FUNCT__
3024 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3025 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3026 {
3027   PetscErrorCode ierr;
3028   Mat            *dummy;
3029 
3030   PetscFunctionBegin;
3031   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3032   *newmat = *dummy;
3033   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3034   PetscFunctionReturn(0);
3035 }
3036 
3037 #undef __FUNCT__
3038 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3039 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3040 {
3041   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3042   PetscErrorCode ierr;
3043 
3044   PetscFunctionBegin;
3045   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3046   PetscFunctionReturn(0);
3047 }
3048 
3049 #undef __FUNCT__
3050 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3051 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3052 {
3053   PetscErrorCode ierr;
3054   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3055 
3056   PetscFunctionBegin;
3057   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3058   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3059   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3060   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3061   PetscFunctionReturn(0);
3062 }
3063 
3064 /* -------------------------------------------------------------------*/
3065 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3066                                        MatGetRow_MPIAIJ,
3067                                        MatRestoreRow_MPIAIJ,
3068                                        MatMult_MPIAIJ,
3069                                 /* 4*/ MatMultAdd_MPIAIJ,
3070                                        MatMultTranspose_MPIAIJ,
3071                                        MatMultTransposeAdd_MPIAIJ,
3072 #if defined(PETSC_HAVE_PBGL)
3073                                        MatSolve_MPIAIJ,
3074 #else
3075                                        0,
3076 #endif
3077                                        0,
3078                                        0,
3079                                 /*10*/ 0,
3080                                        0,
3081                                        0,
3082                                        MatSOR_MPIAIJ,
3083                                        MatTranspose_MPIAIJ,
3084                                 /*15*/ MatGetInfo_MPIAIJ,
3085                                        MatEqual_MPIAIJ,
3086                                        MatGetDiagonal_MPIAIJ,
3087                                        MatDiagonalScale_MPIAIJ,
3088                                        MatNorm_MPIAIJ,
3089                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3090                                        MatAssemblyEnd_MPIAIJ,
3091                                        MatSetOption_MPIAIJ,
3092                                        MatZeroEntries_MPIAIJ,
3093                                 /*24*/ MatZeroRows_MPIAIJ,
3094                                        0,
3095 #if defined(PETSC_HAVE_PBGL)
3096                                        0,
3097 #else
3098                                        0,
3099 #endif
3100                                        0,
3101                                        0,
3102                                 /*29*/ MatSetUp_MPIAIJ,
3103 #if defined(PETSC_HAVE_PBGL)
3104                                        0,
3105 #else
3106                                        0,
3107 #endif
3108                                        0,
3109                                        0,
3110                                        0,
3111                                 /*34*/ MatDuplicate_MPIAIJ,
3112                                        0,
3113                                        0,
3114                                        0,
3115                                        0,
3116                                 /*39*/ MatAXPY_MPIAIJ,
3117                                        MatGetSubMatrices_MPIAIJ,
3118                                        MatIncreaseOverlap_MPIAIJ,
3119                                        MatGetValues_MPIAIJ,
3120                                        MatCopy_MPIAIJ,
3121                                 /*44*/ MatGetRowMax_MPIAIJ,
3122                                        MatScale_MPIAIJ,
3123                                        0,
3124                                        0,
3125                                        MatZeroRowsColumns_MPIAIJ,
3126                                 /*49*/ MatSetRandom_MPIAIJ,
3127                                        0,
3128                                        0,
3129                                        0,
3130                                        0,
3131                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3132                                        0,
3133                                        MatSetUnfactored_MPIAIJ,
3134                                        MatPermute_MPIAIJ,
3135                                        0,
3136                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3137                                        MatDestroy_MPIAIJ,
3138                                        MatView_MPIAIJ,
3139                                        0,
3140                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3141                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3142                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3143                                        0,
3144                                        0,
3145                                        0,
3146                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3147                                        MatGetRowMinAbs_MPIAIJ,
3148                                        0,
3149                                        MatSetColoring_MPIAIJ,
3150                                        0,
3151                                        MatSetValuesAdifor_MPIAIJ,
3152                                 /*75*/ MatFDColoringApply_AIJ,
3153                                        0,
3154                                        0,
3155                                        0,
3156                                        MatFindZeroDiagonals_MPIAIJ,
3157                                 /*80*/ 0,
3158                                        0,
3159                                        0,
3160                                 /*83*/ MatLoad_MPIAIJ,
3161                                        0,
3162                                        0,
3163                                        0,
3164                                        0,
3165                                        0,
3166                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3167                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3168                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3169                                        MatPtAP_MPIAIJ_MPIAIJ,
3170                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3171                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3172                                        0,
3173                                        0,
3174                                        0,
3175                                        0,
3176                                 /*99*/ 0,
3177                                        0,
3178                                        0,
3179                                        MatConjugate_MPIAIJ,
3180                                        0,
3181                                 /*104*/MatSetValuesRow_MPIAIJ,
3182                                        MatRealPart_MPIAIJ,
3183                                        MatImaginaryPart_MPIAIJ,
3184                                        0,
3185                                        0,
3186                                 /*109*/0,
3187                                        MatGetRedundantMatrix_MPIAIJ,
3188                                        MatGetRowMin_MPIAIJ,
3189                                        0,
3190                                        0,
3191                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3192                                        0,
3193                                        0,
3194                                        0,
3195                                        0,
3196                                 /*119*/0,
3197                                        0,
3198                                        0,
3199                                        0,
3200                                        MatGetMultiProcBlock_MPIAIJ,
3201                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3202                                        MatGetColumnNorms_MPIAIJ,
3203                                        MatInvertBlockDiagonal_MPIAIJ,
3204                                        0,
3205                                        MatGetSubMatricesParallel_MPIAIJ,
3206                                 /*129*/0,
3207                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3208                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3209                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3210                                        0,
3211                                 /*134*/0,
3212                                        0,
3213                                        0,
3214                                        0,
3215                                        0,
3216                                 /*139*/0,
3217                                        0,
3218                                        0,
3219                                        MatFDColoringSetUp_MPIXAIJ
3220 };
3221 
3222 /* ----------------------------------------------------------------------------------------*/
3223 
3224 #undef __FUNCT__
3225 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3226 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3227 {
3228   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3229   PetscErrorCode ierr;
3230 
3231   PetscFunctionBegin;
3232   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3233   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3234   PetscFunctionReturn(0);
3235 }
3236 
3237 #undef __FUNCT__
3238 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3239 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3240 {
3241   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3242   PetscErrorCode ierr;
3243 
3244   PetscFunctionBegin;
3245   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3246   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3247   PetscFunctionReturn(0);
3248 }
3249 
3250 #undef __FUNCT__
3251 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3252 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3253 {
3254   Mat_MPIAIJ     *b;
3255   PetscErrorCode ierr;
3256 
3257   PetscFunctionBegin;
3258   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3259   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3260   b = (Mat_MPIAIJ*)B->data;
3261 
3262   if (!B->preallocated) {
3263     /* Explicitly create 2 MATSEQAIJ matrices. */
3264     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3265     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3266     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3267     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3268     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3269     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3270     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3271     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3272     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3273     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3274   }
3275 
3276   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3277   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3278   B->preallocated = PETSC_TRUE;
3279   PetscFunctionReturn(0);
3280 }
3281 
3282 #undef __FUNCT__
3283 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3284 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3285 {
3286   Mat            mat;
3287   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3288   PetscErrorCode ierr;
3289 
3290   PetscFunctionBegin;
3291   *newmat = 0;
3292   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3293   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3294   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3295   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3296   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3297   a       = (Mat_MPIAIJ*)mat->data;
3298 
3299   mat->factortype   = matin->factortype;
3300   mat->assembled    = PETSC_TRUE;
3301   mat->insertmode   = NOT_SET_VALUES;
3302   mat->preallocated = PETSC_TRUE;
3303 
3304   a->size         = oldmat->size;
3305   a->rank         = oldmat->rank;
3306   a->donotstash   = oldmat->donotstash;
3307   a->roworiented  = oldmat->roworiented;
3308   a->rowindices   = 0;
3309   a->rowvalues    = 0;
3310   a->getrowactive = PETSC_FALSE;
3311 
3312   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3313   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3314 
3315   if (oldmat->colmap) {
3316 #if defined(PETSC_USE_CTABLE)
3317     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3318 #else
3319     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3320     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3321     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3322 #endif
3323   } else a->colmap = 0;
3324   if (oldmat->garray) {
3325     PetscInt len;
3326     len  = oldmat->B->cmap->n;
3327     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3328     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3329     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3330   } else a->garray = 0;
3331 
3332   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3333   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3334   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3335   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3336   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3337   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3338   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3339   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3340   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3341   *newmat = mat;
3342   PetscFunctionReturn(0);
3343 }
3344 
3345 
3346 
3347 #undef __FUNCT__
3348 #define __FUNCT__ "MatLoad_MPIAIJ"
3349 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3350 {
3351   PetscScalar    *vals,*svals;
3352   MPI_Comm       comm;
3353   PetscErrorCode ierr;
3354   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3355   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3356   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3357   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3358   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3359   int            fd;
3360   PetscInt       bs = 1;
3361 
3362   PetscFunctionBegin;
3363   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3364   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3365   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3366   if (!rank) {
3367     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3368     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3369     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3370   }
3371 
3372   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3373   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3374   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3375 
3376   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3377 
3378   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3379   M    = header[1]; N = header[2];
3380   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3381   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3382   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3383 
3384   /* If global sizes are set, check if they are consistent with that given in the file */
3385   if (sizesset) {
3386     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3387   }
3388   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3389   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3390 
3391   /* determine ownership of all (block) rows */
3392   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3393   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3394   else m = newMat->rmap->n; /* Set by user */
3395 
3396   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3397   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3398 
3399   /* First process needs enough room for process with most rows */
3400   if (!rank) {
3401     mmax = rowners[1];
3402     for (i=2; i<=size; i++) {
3403       mmax = PetscMax(mmax, rowners[i]);
3404     }
3405   } else mmax = -1;             /* unused, but compilers complain */
3406 
3407   rowners[0] = 0;
3408   for (i=2; i<=size; i++) {
3409     rowners[i] += rowners[i-1];
3410   }
3411   rstart = rowners[rank];
3412   rend   = rowners[rank+1];
3413 
3414   /* distribute row lengths to all processors */
3415   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3416   if (!rank) {
3417     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3418     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3419     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3420     for (j=0; j<m; j++) {
3421       procsnz[0] += ourlens[j];
3422     }
3423     for (i=1; i<size; i++) {
3424       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3425       /* calculate the number of nonzeros on each processor */
3426       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3427         procsnz[i] += rowlengths[j];
3428       }
3429       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3430     }
3431     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3432   } else {
3433     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3434   }
3435 
3436   if (!rank) {
3437     /* determine max buffer needed and allocate it */
3438     maxnz = 0;
3439     for (i=0; i<size; i++) {
3440       maxnz = PetscMax(maxnz,procsnz[i]);
3441     }
3442     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3443 
3444     /* read in my part of the matrix column indices  */
3445     nz   = procsnz[0];
3446     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3447     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3448 
3449     /* read in every one elses and ship off */
3450     for (i=1; i<size; i++) {
3451       nz   = procsnz[i];
3452       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3453       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3454     }
3455     ierr = PetscFree(cols);CHKERRQ(ierr);
3456   } else {
3457     /* determine buffer space needed for message */
3458     nz = 0;
3459     for (i=0; i<m; i++) {
3460       nz += ourlens[i];
3461     }
3462     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3463 
3464     /* receive message of column indices*/
3465     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3466   }
3467 
3468   /* determine column ownership if matrix is not square */
3469   if (N != M) {
3470     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3471     else n = newMat->cmap->n;
3472     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3473     cstart = cend - n;
3474   } else {
3475     cstart = rstart;
3476     cend   = rend;
3477     n      = cend - cstart;
3478   }
3479 
3480   /* loop over local rows, determining number of off diagonal entries */
3481   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3482   jj   = 0;
3483   for (i=0; i<m; i++) {
3484     for (j=0; j<ourlens[i]; j++) {
3485       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3486       jj++;
3487     }
3488   }
3489 
3490   for (i=0; i<m; i++) {
3491     ourlens[i] -= offlens[i];
3492   }
3493   if (!sizesset) {
3494     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3495   }
3496 
3497   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3498 
3499   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3500 
3501   for (i=0; i<m; i++) {
3502     ourlens[i] += offlens[i];
3503   }
3504 
3505   if (!rank) {
3506     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3507 
3508     /* read in my part of the matrix numerical values  */
3509     nz   = procsnz[0];
3510     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3511 
3512     /* insert into matrix */
3513     jj      = rstart;
3514     smycols = mycols;
3515     svals   = vals;
3516     for (i=0; i<m; i++) {
3517       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3518       smycols += ourlens[i];
3519       svals   += ourlens[i];
3520       jj++;
3521     }
3522 
3523     /* read in other processors and ship out */
3524     for (i=1; i<size; i++) {
3525       nz   = procsnz[i];
3526       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3527       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3528     }
3529     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3530   } else {
3531     /* receive numeric values */
3532     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3533 
3534     /* receive message of values*/
3535     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3536 
3537     /* insert into matrix */
3538     jj      = rstart;
3539     smycols = mycols;
3540     svals   = vals;
3541     for (i=0; i<m; i++) {
3542       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3543       smycols += ourlens[i];
3544       svals   += ourlens[i];
3545       jj++;
3546     }
3547   }
3548   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3549   ierr = PetscFree(vals);CHKERRQ(ierr);
3550   ierr = PetscFree(mycols);CHKERRQ(ierr);
3551   ierr = PetscFree(rowners);CHKERRQ(ierr);
3552   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3553   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3554   PetscFunctionReturn(0);
3555 }
3556 
3557 #undef __FUNCT__
3558 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3559 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3560 {
3561   PetscErrorCode ierr;
3562   IS             iscol_local;
3563   PetscInt       csize;
3564 
3565   PetscFunctionBegin;
3566   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3567   if (call == MAT_REUSE_MATRIX) {
3568     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3569     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3570   } else {
3571     PetscInt cbs;
3572     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3573     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3574     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3575   }
3576   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3577   if (call == MAT_INITIAL_MATRIX) {
3578     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3579     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3580   }
3581   PetscFunctionReturn(0);
3582 }
3583 
3584 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3585 #undef __FUNCT__
3586 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3587 /*
3588     Not great since it makes two copies of the submatrix, first an SeqAIJ
3589   in local and then by concatenating the local matrices the end result.
3590   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3591 
3592   Note: This requires a sequential iscol with all indices.
3593 */
3594 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3595 {
3596   PetscErrorCode ierr;
3597   PetscMPIInt    rank,size;
3598   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3599   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3600   PetscBool      allcolumns, colflag;
3601   Mat            M,Mreuse;
3602   MatScalar      *vwork,*aa;
3603   MPI_Comm       comm;
3604   Mat_SeqAIJ     *aij;
3605 
3606   PetscFunctionBegin;
3607   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3608   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3609   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3610 
3611   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3612   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3613   if (colflag && ncol == mat->cmap->N) {
3614     allcolumns = PETSC_TRUE;
3615   } else {
3616     allcolumns = PETSC_FALSE;
3617   }
3618   if (call ==  MAT_REUSE_MATRIX) {
3619     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3620     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3621     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3622   } else {
3623     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3624   }
3625 
3626   /*
3627       m - number of local rows
3628       n - number of columns (same on all processors)
3629       rstart - first row in new global matrix generated
3630   */
3631   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3632   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3633   if (call == MAT_INITIAL_MATRIX) {
3634     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3635     ii  = aij->i;
3636     jj  = aij->j;
3637 
3638     /*
3639         Determine the number of non-zeros in the diagonal and off-diagonal
3640         portions of the matrix in order to do correct preallocation
3641     */
3642 
3643     /* first get start and end of "diagonal" columns */
3644     if (csize == PETSC_DECIDE) {
3645       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3646       if (mglobal == n) { /* square matrix */
3647         nlocal = m;
3648       } else {
3649         nlocal = n/size + ((n % size) > rank);
3650       }
3651     } else {
3652       nlocal = csize;
3653     }
3654     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3655     rstart = rend - nlocal;
3656     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3657 
3658     /* next, compute all the lengths */
3659     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3660     olens = dlens + m;
3661     for (i=0; i<m; i++) {
3662       jend = ii[i+1] - ii[i];
3663       olen = 0;
3664       dlen = 0;
3665       for (j=0; j<jend; j++) {
3666         if (*jj < rstart || *jj >= rend) olen++;
3667         else dlen++;
3668         jj++;
3669       }
3670       olens[i] = olen;
3671       dlens[i] = dlen;
3672     }
3673     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3674     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3675     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3676     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3677     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3678     ierr = PetscFree(dlens);CHKERRQ(ierr);
3679   } else {
3680     PetscInt ml,nl;
3681 
3682     M    = *newmat;
3683     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3684     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3685     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3686     /*
3687          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3688        rather than the slower MatSetValues().
3689     */
3690     M->was_assembled = PETSC_TRUE;
3691     M->assembled     = PETSC_FALSE;
3692   }
3693   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3694   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3695   ii   = aij->i;
3696   jj   = aij->j;
3697   aa   = aij->a;
3698   for (i=0; i<m; i++) {
3699     row   = rstart + i;
3700     nz    = ii[i+1] - ii[i];
3701     cwork = jj;     jj += nz;
3702     vwork = aa;     aa += nz;
3703     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3704   }
3705 
3706   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3707   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3708   *newmat = M;
3709 
3710   /* save submatrix used in processor for next request */
3711   if (call ==  MAT_INITIAL_MATRIX) {
3712     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3713     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3714   }
3715   PetscFunctionReturn(0);
3716 }
3717 
3718 #undef __FUNCT__
3719 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3720 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3721 {
3722   PetscInt       m,cstart, cend,j,nnz,i,d;
3723   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3724   const PetscInt *JJ;
3725   PetscScalar    *values;
3726   PetscErrorCode ierr;
3727 
3728   PetscFunctionBegin;
3729   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3730 
3731   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3732   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3733   m      = B->rmap->n;
3734   cstart = B->cmap->rstart;
3735   cend   = B->cmap->rend;
3736   rstart = B->rmap->rstart;
3737 
3738   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3739 
3740 #if defined(PETSC_USE_DEBUGGING)
3741   for (i=0; i<m; i++) {
3742     nnz = Ii[i+1]- Ii[i];
3743     JJ  = J + Ii[i];
3744     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3745     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3746     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3747   }
3748 #endif
3749 
3750   for (i=0; i<m; i++) {
3751     nnz     = Ii[i+1]- Ii[i];
3752     JJ      = J + Ii[i];
3753     nnz_max = PetscMax(nnz_max,nnz);
3754     d       = 0;
3755     for (j=0; j<nnz; j++) {
3756       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3757     }
3758     d_nnz[i] = d;
3759     o_nnz[i] = nnz - d;
3760   }
3761   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3762   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3763 
3764   if (v) values = (PetscScalar*)v;
3765   else {
3766     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3767   }
3768 
3769   for (i=0; i<m; i++) {
3770     ii   = i + rstart;
3771     nnz  = Ii[i+1]- Ii[i];
3772     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3773   }
3774   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3775   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3776 
3777   if (!v) {
3778     ierr = PetscFree(values);CHKERRQ(ierr);
3779   }
3780   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3781   PetscFunctionReturn(0);
3782 }
3783 
3784 #undef __FUNCT__
3785 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3786 /*@
3787    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3788    (the default parallel PETSc format).
3789 
3790    Collective on MPI_Comm
3791 
3792    Input Parameters:
3793 +  B - the matrix
3794 .  i - the indices into j for the start of each local row (starts with zero)
3795 .  j - the column indices for each local row (starts with zero)
3796 -  v - optional values in the matrix
3797 
3798    Level: developer
3799 
3800    Notes:
3801        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3802      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3803      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3804 
3805        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3806 
3807        The format which is used for the sparse matrix input, is equivalent to a
3808     row-major ordering.. i.e for the following matrix, the input data expected is
3809     as shown:
3810 
3811         1 0 0
3812         2 0 3     P0
3813        -------
3814         4 5 6     P1
3815 
3816      Process0 [P0]: rows_owned=[0,1]
3817         i =  {0,1,3}  [size = nrow+1  = 2+1]
3818         j =  {0,0,2}  [size = nz = 6]
3819         v =  {1,2,3}  [size = nz = 6]
3820 
3821      Process1 [P1]: rows_owned=[2]
3822         i =  {0,3}    [size = nrow+1  = 1+1]
3823         j =  {0,1,2}  [size = nz = 6]
3824         v =  {4,5,6}  [size = nz = 6]
3825 
3826 .keywords: matrix, aij, compressed row, sparse, parallel
3827 
3828 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3829           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3830 @*/
3831 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3832 {
3833   PetscErrorCode ierr;
3834 
3835   PetscFunctionBegin;
3836   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3837   PetscFunctionReturn(0);
3838 }
3839 
3840 #undef __FUNCT__
3841 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3842 /*@C
3843    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3844    (the default parallel PETSc format).  For good matrix assembly performance
3845    the user should preallocate the matrix storage by setting the parameters
3846    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3847    performance can be increased by more than a factor of 50.
3848 
3849    Collective on MPI_Comm
3850 
3851    Input Parameters:
3852 +  A - the matrix
3853 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3854            (same value is used for all local rows)
3855 .  d_nnz - array containing the number of nonzeros in the various rows of the
3856            DIAGONAL portion of the local submatrix (possibly different for each row)
3857            or NULL, if d_nz is used to specify the nonzero structure.
3858            The size of this array is equal to the number of local rows, i.e 'm'.
3859            For matrices that will be factored, you must leave room for (and set)
3860            the diagonal entry even if it is zero.
3861 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3862            submatrix (same value is used for all local rows).
3863 -  o_nnz - array containing the number of nonzeros in the various rows of the
3864            OFF-DIAGONAL portion of the local submatrix (possibly different for
3865            each row) or NULL, if o_nz is used to specify the nonzero
3866            structure. The size of this array is equal to the number
3867            of local rows, i.e 'm'.
3868 
3869    If the *_nnz parameter is given then the *_nz parameter is ignored
3870 
3871    The AIJ format (also called the Yale sparse matrix format or
3872    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3873    storage.  The stored row and column indices begin with zero.
3874    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3875 
3876    The parallel matrix is partitioned such that the first m0 rows belong to
3877    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3878    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3879 
3880    The DIAGONAL portion of the local submatrix of a processor can be defined
3881    as the submatrix which is obtained by extraction the part corresponding to
3882    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3883    first row that belongs to the processor, r2 is the last row belonging to
3884    the this processor, and c1-c2 is range of indices of the local part of a
3885    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3886    common case of a square matrix, the row and column ranges are the same and
3887    the DIAGONAL part is also square. The remaining portion of the local
3888    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3889 
3890    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3891 
3892    You can call MatGetInfo() to get information on how effective the preallocation was;
3893    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3894    You can also run with the option -info and look for messages with the string
3895    malloc in them to see if additional memory allocation was needed.
3896 
3897    Example usage:
3898 
3899    Consider the following 8x8 matrix with 34 non-zero values, that is
3900    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3901    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3902    as follows:
3903 
3904 .vb
3905             1  2  0  |  0  3  0  |  0  4
3906     Proc0   0  5  6  |  7  0  0  |  8  0
3907             9  0 10  | 11  0  0  | 12  0
3908     -------------------------------------
3909            13  0 14  | 15 16 17  |  0  0
3910     Proc1   0 18  0  | 19 20 21  |  0  0
3911             0  0  0  | 22 23  0  | 24  0
3912     -------------------------------------
3913     Proc2  25 26 27  |  0  0 28  | 29  0
3914            30  0  0  | 31 32 33  |  0 34
3915 .ve
3916 
3917    This can be represented as a collection of submatrices as:
3918 
3919 .vb
3920       A B C
3921       D E F
3922       G H I
3923 .ve
3924 
3925    Where the submatrices A,B,C are owned by proc0, D,E,F are
3926    owned by proc1, G,H,I are owned by proc2.
3927 
3928    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3929    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3930    The 'M','N' parameters are 8,8, and have the same values on all procs.
3931 
3932    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3933    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3934    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3935    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3936    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3937    matrix, ans [DF] as another SeqAIJ matrix.
3938 
3939    When d_nz, o_nz parameters are specified, d_nz storage elements are
3940    allocated for every row of the local diagonal submatrix, and o_nz
3941    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3942    One way to choose d_nz and o_nz is to use the max nonzerors per local
3943    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3944    In this case, the values of d_nz,o_nz are:
3945 .vb
3946      proc0 : dnz = 2, o_nz = 2
3947      proc1 : dnz = 3, o_nz = 2
3948      proc2 : dnz = 1, o_nz = 4
3949 .ve
3950    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3951    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3952    for proc3. i.e we are using 12+15+10=37 storage locations to store
3953    34 values.
3954 
3955    When d_nnz, o_nnz parameters are specified, the storage is specified
3956    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3957    In the above case the values for d_nnz,o_nnz are:
3958 .vb
3959      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3960      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3961      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3962 .ve
3963    Here the space allocated is sum of all the above values i.e 34, and
3964    hence pre-allocation is perfect.
3965 
3966    Level: intermediate
3967 
3968 .keywords: matrix, aij, compressed row, sparse, parallel
3969 
3970 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3971           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3972 @*/
3973 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3974 {
3975   PetscErrorCode ierr;
3976 
3977   PetscFunctionBegin;
3978   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3979   PetscValidType(B,1);
3980   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3981   PetscFunctionReturn(0);
3982 }
3983 
3984 #undef __FUNCT__
3985 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3986 /*@
3987      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3988          CSR format the local rows.
3989 
3990    Collective on MPI_Comm
3991 
3992    Input Parameters:
3993 +  comm - MPI communicator
3994 .  m - number of local rows (Cannot be PETSC_DECIDE)
3995 .  n - This value should be the same as the local size used in creating the
3996        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3997        calculated if N is given) For square matrices n is almost always m.
3998 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3999 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4000 .   i - row indices
4001 .   j - column indices
4002 -   a - matrix values
4003 
4004    Output Parameter:
4005 .   mat - the matrix
4006 
4007    Level: intermediate
4008 
4009    Notes:
4010        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4011      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4012      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4013 
4014        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4015 
4016        The format which is used for the sparse matrix input, is equivalent to a
4017     row-major ordering.. i.e for the following matrix, the input data expected is
4018     as shown:
4019 
4020         1 0 0
4021         2 0 3     P0
4022        -------
4023         4 5 6     P1
4024 
4025      Process0 [P0]: rows_owned=[0,1]
4026         i =  {0,1,3}  [size = nrow+1  = 2+1]
4027         j =  {0,0,2}  [size = nz = 6]
4028         v =  {1,2,3}  [size = nz = 6]
4029 
4030      Process1 [P1]: rows_owned=[2]
4031         i =  {0,3}    [size = nrow+1  = 1+1]
4032         j =  {0,1,2}  [size = nz = 6]
4033         v =  {4,5,6}  [size = nz = 6]
4034 
4035 .keywords: matrix, aij, compressed row, sparse, parallel
4036 
4037 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4038           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4039 @*/
4040 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4041 {
4042   PetscErrorCode ierr;
4043 
4044   PetscFunctionBegin;
4045   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4046   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4047   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4048   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4049   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4050   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4051   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4052   PetscFunctionReturn(0);
4053 }
4054 
4055 #undef __FUNCT__
4056 #define __FUNCT__ "MatCreateAIJ"
4057 /*@C
4058    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4059    (the default parallel PETSc format).  For good matrix assembly performance
4060    the user should preallocate the matrix storage by setting the parameters
4061    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4062    performance can be increased by more than a factor of 50.
4063 
4064    Collective on MPI_Comm
4065 
4066    Input Parameters:
4067 +  comm - MPI communicator
4068 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4069            This value should be the same as the local size used in creating the
4070            y vector for the matrix-vector product y = Ax.
4071 .  n - This value should be the same as the local size used in creating the
4072        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4073        calculated if N is given) For square matrices n is almost always m.
4074 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4075 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4076 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4077            (same value is used for all local rows)
4078 .  d_nnz - array containing the number of nonzeros in the various rows of the
4079            DIAGONAL portion of the local submatrix (possibly different for each row)
4080            or NULL, if d_nz is used to specify the nonzero structure.
4081            The size of this array is equal to the number of local rows, i.e 'm'.
4082 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4083            submatrix (same value is used for all local rows).
4084 -  o_nnz - array containing the number of nonzeros in the various rows of the
4085            OFF-DIAGONAL portion of the local submatrix (possibly different for
4086            each row) or NULL, if o_nz is used to specify the nonzero
4087            structure. The size of this array is equal to the number
4088            of local rows, i.e 'm'.
4089 
4090    Output Parameter:
4091 .  A - the matrix
4092 
4093    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4094    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4095    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4096 
4097    Notes:
4098    If the *_nnz parameter is given then the *_nz parameter is ignored
4099 
4100    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4101    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4102    storage requirements for this matrix.
4103 
4104    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4105    processor than it must be used on all processors that share the object for
4106    that argument.
4107 
4108    The user MUST specify either the local or global matrix dimensions
4109    (possibly both).
4110 
4111    The parallel matrix is partitioned across processors such that the
4112    first m0 rows belong to process 0, the next m1 rows belong to
4113    process 1, the next m2 rows belong to process 2 etc.. where
4114    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4115    values corresponding to [m x N] submatrix.
4116 
4117    The columns are logically partitioned with the n0 columns belonging
4118    to 0th partition, the next n1 columns belonging to the next
4119    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4120 
4121    The DIAGONAL portion of the local submatrix on any given processor
4122    is the submatrix corresponding to the rows and columns m,n
4123    corresponding to the given processor. i.e diagonal matrix on
4124    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4125    etc. The remaining portion of the local submatrix [m x (N-n)]
4126    constitute the OFF-DIAGONAL portion. The example below better
4127    illustrates this concept.
4128 
4129    For a square global matrix we define each processor's diagonal portion
4130    to be its local rows and the corresponding columns (a square submatrix);
4131    each processor's off-diagonal portion encompasses the remainder of the
4132    local matrix (a rectangular submatrix).
4133 
4134    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4135 
4136    When calling this routine with a single process communicator, a matrix of
4137    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4138    type of communicator, use the construction mechanism:
4139      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4140 
4141    By default, this format uses inodes (identical nodes) when possible.
4142    We search for consecutive rows with the same nonzero structure, thereby
4143    reusing matrix information to achieve increased efficiency.
4144 
4145    Options Database Keys:
4146 +  -mat_no_inode  - Do not use inodes
4147 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4148 -  -mat_aij_oneindex - Internally use indexing starting at 1
4149         rather than 0.  Note that when calling MatSetValues(),
4150         the user still MUST index entries starting at 0!
4151 
4152 
4153    Example usage:
4154 
4155    Consider the following 8x8 matrix with 34 non-zero values, that is
4156    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4157    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4158    as follows:
4159 
4160 .vb
4161             1  2  0  |  0  3  0  |  0  4
4162     Proc0   0  5  6  |  7  0  0  |  8  0
4163             9  0 10  | 11  0  0  | 12  0
4164     -------------------------------------
4165            13  0 14  | 15 16 17  |  0  0
4166     Proc1   0 18  0  | 19 20 21  |  0  0
4167             0  0  0  | 22 23  0  | 24  0
4168     -------------------------------------
4169     Proc2  25 26 27  |  0  0 28  | 29  0
4170            30  0  0  | 31 32 33  |  0 34
4171 .ve
4172 
4173    This can be represented as a collection of submatrices as:
4174 
4175 .vb
4176       A B C
4177       D E F
4178       G H I
4179 .ve
4180 
4181    Where the submatrices A,B,C are owned by proc0, D,E,F are
4182    owned by proc1, G,H,I are owned by proc2.
4183 
4184    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4185    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4186    The 'M','N' parameters are 8,8, and have the same values on all procs.
4187 
4188    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4189    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4190    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4191    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4192    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4193    matrix, ans [DF] as another SeqAIJ matrix.
4194 
4195    When d_nz, o_nz parameters are specified, d_nz storage elements are
4196    allocated for every row of the local diagonal submatrix, and o_nz
4197    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4198    One way to choose d_nz and o_nz is to use the max nonzerors per local
4199    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4200    In this case, the values of d_nz,o_nz are:
4201 .vb
4202      proc0 : dnz = 2, o_nz = 2
4203      proc1 : dnz = 3, o_nz = 2
4204      proc2 : dnz = 1, o_nz = 4
4205 .ve
4206    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4207    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4208    for proc3. i.e we are using 12+15+10=37 storage locations to store
4209    34 values.
4210 
4211    When d_nnz, o_nnz parameters are specified, the storage is specified
4212    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4213    In the above case the values for d_nnz,o_nnz are:
4214 .vb
4215      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4216      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4217      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4218 .ve
4219    Here the space allocated is sum of all the above values i.e 34, and
4220    hence pre-allocation is perfect.
4221 
4222    Level: intermediate
4223 
4224 .keywords: matrix, aij, compressed row, sparse, parallel
4225 
4226 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4227           MPIAIJ, MatCreateMPIAIJWithArrays()
4228 @*/
4229 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4230 {
4231   PetscErrorCode ierr;
4232   PetscMPIInt    size;
4233 
4234   PetscFunctionBegin;
4235   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4236   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4237   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4238   if (size > 1) {
4239     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4240     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4241   } else {
4242     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4243     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4244   }
4245   PetscFunctionReturn(0);
4246 }
4247 
4248 #undef __FUNCT__
4249 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4250 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4251 {
4252   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4253 
4254   PetscFunctionBegin;
4255   *Ad     = a->A;
4256   *Ao     = a->B;
4257   *colmap = a->garray;
4258   PetscFunctionReturn(0);
4259 }
4260 
4261 #undef __FUNCT__
4262 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4263 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4264 {
4265   PetscErrorCode ierr;
4266   PetscInt       i;
4267   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4268 
4269   PetscFunctionBegin;
4270   if (coloring->ctype == IS_COLORING_GLOBAL) {
4271     ISColoringValue *allcolors,*colors;
4272     ISColoring      ocoloring;
4273 
4274     /* set coloring for diagonal portion */
4275     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4276 
4277     /* set coloring for off-diagonal portion */
4278     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4279     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4280     for (i=0; i<a->B->cmap->n; i++) {
4281       colors[i] = allcolors[a->garray[i]];
4282     }
4283     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4284     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4285     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4286     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4287   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4288     ISColoringValue *colors;
4289     PetscInt        *larray;
4290     ISColoring      ocoloring;
4291 
4292     /* set coloring for diagonal portion */
4293     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4294     for (i=0; i<a->A->cmap->n; i++) {
4295       larray[i] = i + A->cmap->rstart;
4296     }
4297     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4298     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4299     for (i=0; i<a->A->cmap->n; i++) {
4300       colors[i] = coloring->colors[larray[i]];
4301     }
4302     ierr = PetscFree(larray);CHKERRQ(ierr);
4303     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4304     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4305     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4306 
4307     /* set coloring for off-diagonal portion */
4308     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4309     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4310     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4311     for (i=0; i<a->B->cmap->n; i++) {
4312       colors[i] = coloring->colors[larray[i]];
4313     }
4314     ierr = PetscFree(larray);CHKERRQ(ierr);
4315     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4316     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4317     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4318   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4319   PetscFunctionReturn(0);
4320 }
4321 
4322 #undef __FUNCT__
4323 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4324 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4325 {
4326   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4327   PetscErrorCode ierr;
4328 
4329   PetscFunctionBegin;
4330   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4331   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4332   PetscFunctionReturn(0);
4333 }
4334 
4335 #undef __FUNCT__
4336 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4337 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4338 {
4339   PetscErrorCode ierr;
4340   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4341   PetscInt       *indx;
4342 
4343   PetscFunctionBegin;
4344   /* This routine will ONLY return MPIAIJ type matrix */
4345   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4346   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4347   if (n == PETSC_DECIDE) {
4348     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4349   }
4350   /* Check sum(n) = N */
4351   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4352   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4353 
4354   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4355   rstart -= m;
4356 
4357   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4358   for (i=0; i<m; i++) {
4359     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4360     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4361     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4362   }
4363 
4364   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4365   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4366   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4367   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4368   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4369   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4370   PetscFunctionReturn(0);
4371 }
4372 
4373 #undef __FUNCT__
4374 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4375 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4376 {
4377   PetscErrorCode ierr;
4378   PetscInt       m,N,i,rstart,nnz,Ii;
4379   PetscInt       *indx;
4380   PetscScalar    *values;
4381 
4382   PetscFunctionBegin;
4383   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4384   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4385   for (i=0; i<m; i++) {
4386     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4387     Ii   = i + rstart;
4388     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4389     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4390   }
4391   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4392   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4393   PetscFunctionReturn(0);
4394 }
4395 
4396 #undef __FUNCT__
4397 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4398 /*@
4399       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4400                  matrices from each processor
4401 
4402     Collective on MPI_Comm
4403 
4404    Input Parameters:
4405 +    comm - the communicators the parallel matrix will live on
4406 .    inmat - the input sequential matrices
4407 .    n - number of local columns (or PETSC_DECIDE)
4408 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4409 
4410    Output Parameter:
4411 .    outmat - the parallel matrix generated
4412 
4413     Level: advanced
4414 
4415    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4416 
4417 @*/
4418 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4419 {
4420   PetscErrorCode ierr;
4421   PetscMPIInt    size;
4422 
4423   PetscFunctionBegin;
4424   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4425   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4426   if (size == 1) {
4427     if (scall == MAT_INITIAL_MATRIX) {
4428       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4429     } else {
4430       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4431     }
4432   } else {
4433     if (scall == MAT_INITIAL_MATRIX) {
4434       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4435     }
4436     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4437   }
4438   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4439   PetscFunctionReturn(0);
4440 }
4441 
4442 #undef __FUNCT__
4443 #define __FUNCT__ "MatFileSplit"
4444 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4445 {
4446   PetscErrorCode    ierr;
4447   PetscMPIInt       rank;
4448   PetscInt          m,N,i,rstart,nnz;
4449   size_t            len;
4450   const PetscInt    *indx;
4451   PetscViewer       out;
4452   char              *name;
4453   Mat               B;
4454   const PetscScalar *values;
4455 
4456   PetscFunctionBegin;
4457   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4458   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4459   /* Should this be the type of the diagonal block of A? */
4460   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4461   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4462   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4463   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4464   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4465   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4466   for (i=0; i<m; i++) {
4467     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4468     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4469     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4470   }
4471   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4472   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4473 
4474   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4475   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4476   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4477   sprintf(name,"%s.%d",outfile,rank);
4478   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4479   ierr = PetscFree(name);CHKERRQ(ierr);
4480   ierr = MatView(B,out);CHKERRQ(ierr);
4481   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4482   ierr = MatDestroy(&B);CHKERRQ(ierr);
4483   PetscFunctionReturn(0);
4484 }
4485 
4486 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4487 #undef __FUNCT__
4488 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4489 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4490 {
4491   PetscErrorCode      ierr;
4492   Mat_Merge_SeqsToMPI *merge;
4493   PetscContainer      container;
4494 
4495   PetscFunctionBegin;
4496   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4497   if (container) {
4498     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4502     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4503     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4504     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4505     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4506     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4507     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4508     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4509     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4510     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4511     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4512     ierr = PetscFree(merge);CHKERRQ(ierr);
4513     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4514   }
4515   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4516   PetscFunctionReturn(0);
4517 }
4518 
4519 #include <../src/mat/utils/freespace.h>
4520 #include <petscbt.h>
4521 
4522 #undef __FUNCT__
4523 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4524 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4525 {
4526   PetscErrorCode      ierr;
4527   MPI_Comm            comm;
4528   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4529   PetscMPIInt         size,rank,taga,*len_s;
4530   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4531   PetscInt            proc,m;
4532   PetscInt            **buf_ri,**buf_rj;
4533   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4534   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4535   MPI_Request         *s_waits,*r_waits;
4536   MPI_Status          *status;
4537   MatScalar           *aa=a->a;
4538   MatScalar           **abuf_r,*ba_i;
4539   Mat_Merge_SeqsToMPI *merge;
4540   PetscContainer      container;
4541 
4542   PetscFunctionBegin;
4543   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4544   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4545 
4546   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4547   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4548 
4549   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4550   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4551 
4552   bi     = merge->bi;
4553   bj     = merge->bj;
4554   buf_ri = merge->buf_ri;
4555   buf_rj = merge->buf_rj;
4556 
4557   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4558   owners = merge->rowmap->range;
4559   len_s  = merge->len_s;
4560 
4561   /* send and recv matrix values */
4562   /*-----------------------------*/
4563   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4564   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4565 
4566   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4567   for (proc=0,k=0; proc<size; proc++) {
4568     if (!len_s[proc]) continue;
4569     i    = owners[proc];
4570     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4571     k++;
4572   }
4573 
4574   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4575   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4576   ierr = PetscFree(status);CHKERRQ(ierr);
4577 
4578   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4579   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4580 
4581   /* insert mat values of mpimat */
4582   /*----------------------------*/
4583   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4584   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4585 
4586   for (k=0; k<merge->nrecv; k++) {
4587     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4588     nrows       = *(buf_ri_k[k]);
4589     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4590     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4591   }
4592 
4593   /* set values of ba */
4594   m = merge->rowmap->n;
4595   for (i=0; i<m; i++) {
4596     arow = owners[rank] + i;
4597     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4598     bnzi = bi[i+1] - bi[i];
4599     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4600 
4601     /* add local non-zero vals of this proc's seqmat into ba */
4602     anzi   = ai[arow+1] - ai[arow];
4603     aj     = a->j + ai[arow];
4604     aa     = a->a + ai[arow];
4605     nextaj = 0;
4606     for (j=0; nextaj<anzi; j++) {
4607       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4608         ba_i[j] += aa[nextaj++];
4609       }
4610     }
4611 
4612     /* add received vals into ba */
4613     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4614       /* i-th row */
4615       if (i == *nextrow[k]) {
4616         anzi   = *(nextai[k]+1) - *nextai[k];
4617         aj     = buf_rj[k] + *(nextai[k]);
4618         aa     = abuf_r[k] + *(nextai[k]);
4619         nextaj = 0;
4620         for (j=0; nextaj<anzi; j++) {
4621           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4622             ba_i[j] += aa[nextaj++];
4623           }
4624         }
4625         nextrow[k]++; nextai[k]++;
4626       }
4627     }
4628     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4629   }
4630   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4631   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4632 
4633   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4634   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4635   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4636   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4637   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4638   PetscFunctionReturn(0);
4639 }
4640 
4641 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4642 
4643 #undef __FUNCT__
4644 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4645 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4646 {
4647   PetscErrorCode      ierr;
4648   Mat                 B_mpi;
4649   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4650   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4651   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4652   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4653   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4654   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4655   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4656   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4657   MPI_Status          *status;
4658   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4659   PetscBT             lnkbt;
4660   Mat_Merge_SeqsToMPI *merge;
4661   PetscContainer      container;
4662 
4663   PetscFunctionBegin;
4664   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4665 
4666   /* make sure it is a PETSc comm */
4667   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4668   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4669   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4670 
4671   ierr = PetscNew(&merge);CHKERRQ(ierr);
4672   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4673 
4674   /* determine row ownership */
4675   /*---------------------------------------------------------*/
4676   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4677   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4678   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4679   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4680   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4681   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4682   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4683 
4684   m      = merge->rowmap->n;
4685   owners = merge->rowmap->range;
4686 
4687   /* determine the number of messages to send, their lengths */
4688   /*---------------------------------------------------------*/
4689   len_s = merge->len_s;
4690 
4691   len          = 0; /* length of buf_si[] */
4692   merge->nsend = 0;
4693   for (proc=0; proc<size; proc++) {
4694     len_si[proc] = 0;
4695     if (proc == rank) {
4696       len_s[proc] = 0;
4697     } else {
4698       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4699       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4700     }
4701     if (len_s[proc]) {
4702       merge->nsend++;
4703       nrows = 0;
4704       for (i=owners[proc]; i<owners[proc+1]; i++) {
4705         if (ai[i+1] > ai[i]) nrows++;
4706       }
4707       len_si[proc] = 2*(nrows+1);
4708       len         += len_si[proc];
4709     }
4710   }
4711 
4712   /* determine the number and length of messages to receive for ij-structure */
4713   /*-------------------------------------------------------------------------*/
4714   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4715   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4716 
4717   /* post the Irecv of j-structure */
4718   /*-------------------------------*/
4719   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4720   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4721 
4722   /* post the Isend of j-structure */
4723   /*--------------------------------*/
4724   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4725 
4726   for (proc=0, k=0; proc<size; proc++) {
4727     if (!len_s[proc]) continue;
4728     i    = owners[proc];
4729     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4730     k++;
4731   }
4732 
4733   /* receives and sends of j-structure are complete */
4734   /*------------------------------------------------*/
4735   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4736   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4737 
4738   /* send and recv i-structure */
4739   /*---------------------------*/
4740   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4741   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4742 
4743   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4744   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4745   for (proc=0,k=0; proc<size; proc++) {
4746     if (!len_s[proc]) continue;
4747     /* form outgoing message for i-structure:
4748          buf_si[0]:                 nrows to be sent
4749                [1:nrows]:           row index (global)
4750                [nrows+1:2*nrows+1]: i-structure index
4751     */
4752     /*-------------------------------------------*/
4753     nrows       = len_si[proc]/2 - 1;
4754     buf_si_i    = buf_si + nrows+1;
4755     buf_si[0]   = nrows;
4756     buf_si_i[0] = 0;
4757     nrows       = 0;
4758     for (i=owners[proc]; i<owners[proc+1]; i++) {
4759       anzi = ai[i+1] - ai[i];
4760       if (anzi) {
4761         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4762         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4763         nrows++;
4764       }
4765     }
4766     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4767     k++;
4768     buf_si += len_si[proc];
4769   }
4770 
4771   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4772   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4773 
4774   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4775   for (i=0; i<merge->nrecv; i++) {
4776     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4777   }
4778 
4779   ierr = PetscFree(len_si);CHKERRQ(ierr);
4780   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4781   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4782   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4783   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4784   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4785   ierr = PetscFree(status);CHKERRQ(ierr);
4786 
4787   /* compute a local seq matrix in each processor */
4788   /*----------------------------------------------*/
4789   /* allocate bi array and free space for accumulating nonzero column info */
4790   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4791   bi[0] = 0;
4792 
4793   /* create and initialize a linked list */
4794   nlnk = N+1;
4795   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4796 
4797   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4798   len  = ai[owners[rank+1]] - ai[owners[rank]];
4799   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4800 
4801   current_space = free_space;
4802 
4803   /* determine symbolic info for each local row */
4804   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4805 
4806   for (k=0; k<merge->nrecv; k++) {
4807     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4808     nrows       = *buf_ri_k[k];
4809     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4810     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4811   }
4812 
4813   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4814   len  = 0;
4815   for (i=0; i<m; i++) {
4816     bnzi = 0;
4817     /* add local non-zero cols of this proc's seqmat into lnk */
4818     arow  = owners[rank] + i;
4819     anzi  = ai[arow+1] - ai[arow];
4820     aj    = a->j + ai[arow];
4821     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4822     bnzi += nlnk;
4823     /* add received col data into lnk */
4824     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4825       if (i == *nextrow[k]) { /* i-th row */
4826         anzi  = *(nextai[k]+1) - *nextai[k];
4827         aj    = buf_rj[k] + *nextai[k];
4828         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4829         bnzi += nlnk;
4830         nextrow[k]++; nextai[k]++;
4831       }
4832     }
4833     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4834 
4835     /* if free space is not available, make more free space */
4836     if (current_space->local_remaining<bnzi) {
4837       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4838       nspacedouble++;
4839     }
4840     /* copy data into free space, then initialize lnk */
4841     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4842     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4843 
4844     current_space->array           += bnzi;
4845     current_space->local_used      += bnzi;
4846     current_space->local_remaining -= bnzi;
4847 
4848     bi[i+1] = bi[i] + bnzi;
4849   }
4850 
4851   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4852 
4853   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4854   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4855   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4856 
4857   /* create symbolic parallel matrix B_mpi */
4858   /*---------------------------------------*/
4859   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4860   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4861   if (n==PETSC_DECIDE) {
4862     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4863   } else {
4864     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4865   }
4866   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4867   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4868   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4869   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4870   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4871 
4872   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4873   B_mpi->assembled    = PETSC_FALSE;
4874   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4875   merge->bi           = bi;
4876   merge->bj           = bj;
4877   merge->buf_ri       = buf_ri;
4878   merge->buf_rj       = buf_rj;
4879   merge->coi          = NULL;
4880   merge->coj          = NULL;
4881   merge->owners_co    = NULL;
4882 
4883   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4884 
4885   /* attach the supporting struct to B_mpi for reuse */
4886   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4887   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4888   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4889   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4890   *mpimat = B_mpi;
4891 
4892   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4893   PetscFunctionReturn(0);
4894 }
4895 
4896 #undef __FUNCT__
4897 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4898 /*@C
4899       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4900                  matrices from each processor
4901 
4902     Collective on MPI_Comm
4903 
4904    Input Parameters:
4905 +    comm - the communicators the parallel matrix will live on
4906 .    seqmat - the input sequential matrices
4907 .    m - number of local rows (or PETSC_DECIDE)
4908 .    n - number of local columns (or PETSC_DECIDE)
4909 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4910 
4911    Output Parameter:
4912 .    mpimat - the parallel matrix generated
4913 
4914     Level: advanced
4915 
4916    Notes:
4917      The dimensions of the sequential matrix in each processor MUST be the same.
4918      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4919      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4920 @*/
4921 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4922 {
4923   PetscErrorCode ierr;
4924   PetscMPIInt    size;
4925 
4926   PetscFunctionBegin;
4927   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4928   if (size == 1) {
4929     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4930     if (scall == MAT_INITIAL_MATRIX) {
4931       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4932     } else {
4933       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4934     }
4935     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4936     PetscFunctionReturn(0);
4937   }
4938   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4939   if (scall == MAT_INITIAL_MATRIX) {
4940     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4941   }
4942   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4943   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4944   PetscFunctionReturn(0);
4945 }
4946 
4947 #undef __FUNCT__
4948 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4949 /*@
4950      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4951           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4952           with MatGetSize()
4953 
4954     Not Collective
4955 
4956    Input Parameters:
4957 +    A - the matrix
4958 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4959 
4960    Output Parameter:
4961 .    A_loc - the local sequential matrix generated
4962 
4963     Level: developer
4964 
4965 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4966 
4967 @*/
4968 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4969 {
4970   PetscErrorCode ierr;
4971   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4972   Mat_SeqAIJ     *mat,*a,*b;
4973   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4974   MatScalar      *aa,*ba,*cam;
4975   PetscScalar    *ca;
4976   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4977   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4978   PetscBool      match;
4979 
4980   PetscFunctionBegin;
4981   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4982   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4983   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4984   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4985   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4986   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4987   aa = a->a; ba = b->a;
4988   if (scall == MAT_INITIAL_MATRIX) {
4989     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4990     ci[0] = 0;
4991     for (i=0; i<am; i++) {
4992       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4993     }
4994     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4995     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4996     k    = 0;
4997     for (i=0; i<am; i++) {
4998       ncols_o = bi[i+1] - bi[i];
4999       ncols_d = ai[i+1] - ai[i];
5000       /* off-diagonal portion of A */
5001       for (jo=0; jo<ncols_o; jo++) {
5002         col = cmap[*bj];
5003         if (col >= cstart) break;
5004         cj[k]   = col; bj++;
5005         ca[k++] = *ba++;
5006       }
5007       /* diagonal portion of A */
5008       for (j=0; j<ncols_d; j++) {
5009         cj[k]   = cstart + *aj++;
5010         ca[k++] = *aa++;
5011       }
5012       /* off-diagonal portion of A */
5013       for (j=jo; j<ncols_o; j++) {
5014         cj[k]   = cmap[*bj++];
5015         ca[k++] = *ba++;
5016       }
5017     }
5018     /* put together the new matrix */
5019     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5020     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5021     /* Since these are PETSc arrays, change flags to free them as necessary. */
5022     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5023     mat->free_a  = PETSC_TRUE;
5024     mat->free_ij = PETSC_TRUE;
5025     mat->nonew   = 0;
5026   } else if (scall == MAT_REUSE_MATRIX) {
5027     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5028     ci = mat->i; cj = mat->j; cam = mat->a;
5029     for (i=0; i<am; i++) {
5030       /* off-diagonal portion of A */
5031       ncols_o = bi[i+1] - bi[i];
5032       for (jo=0; jo<ncols_o; jo++) {
5033         col = cmap[*bj];
5034         if (col >= cstart) break;
5035         *cam++ = *ba++; bj++;
5036       }
5037       /* diagonal portion of A */
5038       ncols_d = ai[i+1] - ai[i];
5039       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5040       /* off-diagonal portion of A */
5041       for (j=jo; j<ncols_o; j++) {
5042         *cam++ = *ba++; bj++;
5043       }
5044     }
5045   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5046   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5047   PetscFunctionReturn(0);
5048 }
5049 
5050 #undef __FUNCT__
5051 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5052 /*@C
5053      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5054 
5055     Not Collective
5056 
5057    Input Parameters:
5058 +    A - the matrix
5059 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5060 -    row, col - index sets of rows and columns to extract (or NULL)
5061 
5062    Output Parameter:
5063 .    A_loc - the local sequential matrix generated
5064 
5065     Level: developer
5066 
5067 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5068 
5069 @*/
5070 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5071 {
5072   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5073   PetscErrorCode ierr;
5074   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5075   IS             isrowa,iscola;
5076   Mat            *aloc;
5077   PetscBool      match;
5078 
5079   PetscFunctionBegin;
5080   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5081   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5082   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5083   if (!row) {
5084     start = A->rmap->rstart; end = A->rmap->rend;
5085     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5086   } else {
5087     isrowa = *row;
5088   }
5089   if (!col) {
5090     start = A->cmap->rstart;
5091     cmap  = a->garray;
5092     nzA   = a->A->cmap->n;
5093     nzB   = a->B->cmap->n;
5094     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5095     ncols = 0;
5096     for (i=0; i<nzB; i++) {
5097       if (cmap[i] < start) idx[ncols++] = cmap[i];
5098       else break;
5099     }
5100     imark = i;
5101     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5102     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5103     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5104   } else {
5105     iscola = *col;
5106   }
5107   if (scall != MAT_INITIAL_MATRIX) {
5108     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5109     aloc[0] = *A_loc;
5110   }
5111   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5112   *A_loc = aloc[0];
5113   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5114   if (!row) {
5115     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5116   }
5117   if (!col) {
5118     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5119   }
5120   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5121   PetscFunctionReturn(0);
5122 }
5123 
5124 #undef __FUNCT__
5125 #define __FUNCT__ "MatGetBrowsOfAcols"
5126 /*@C
5127     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5128 
5129     Collective on Mat
5130 
5131    Input Parameters:
5132 +    A,B - the matrices in mpiaij format
5133 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5134 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5135 
5136    Output Parameter:
5137 +    rowb, colb - index sets of rows and columns of B to extract
5138 -    B_seq - the sequential matrix generated
5139 
5140     Level: developer
5141 
5142 @*/
5143 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5144 {
5145   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5146   PetscErrorCode ierr;
5147   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5148   IS             isrowb,iscolb;
5149   Mat            *bseq=NULL;
5150 
5151   PetscFunctionBegin;
5152   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5153     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5154   }
5155   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5156 
5157   if (scall == MAT_INITIAL_MATRIX) {
5158     start = A->cmap->rstart;
5159     cmap  = a->garray;
5160     nzA   = a->A->cmap->n;
5161     nzB   = a->B->cmap->n;
5162     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5163     ncols = 0;
5164     for (i=0; i<nzB; i++) {  /* row < local row index */
5165       if (cmap[i] < start) idx[ncols++] = cmap[i];
5166       else break;
5167     }
5168     imark = i;
5169     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5170     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5171     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5172     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5173   } else {
5174     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5175     isrowb  = *rowb; iscolb = *colb;
5176     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5177     bseq[0] = *B_seq;
5178   }
5179   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5180   *B_seq = bseq[0];
5181   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5182   if (!rowb) {
5183     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5184   } else {
5185     *rowb = isrowb;
5186   }
5187   if (!colb) {
5188     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5189   } else {
5190     *colb = iscolb;
5191   }
5192   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5193   PetscFunctionReturn(0);
5194 }
5195 
5196 #undef __FUNCT__
5197 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5198 /*
5199     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5200     of the OFF-DIAGONAL portion of local A
5201 
5202     Collective on Mat
5203 
5204    Input Parameters:
5205 +    A,B - the matrices in mpiaij format
5206 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5207 
5208    Output Parameter:
5209 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5210 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5211 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5212 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5213 
5214     Level: developer
5215 
5216 */
5217 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5218 {
5219   VecScatter_MPI_General *gen_to,*gen_from;
5220   PetscErrorCode         ierr;
5221   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5222   Mat_SeqAIJ             *b_oth;
5223   VecScatter             ctx =a->Mvctx;
5224   MPI_Comm               comm;
5225   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5226   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5227   PetscScalar            *rvalues,*svalues;
5228   MatScalar              *b_otha,*bufa,*bufA;
5229   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5230   MPI_Request            *rwaits = NULL,*swaits = NULL;
5231   MPI_Status             *sstatus,rstatus;
5232   PetscMPIInt            jj;
5233   PetscInt               *cols,sbs,rbs;
5234   PetscScalar            *vals;
5235 
5236   PetscFunctionBegin;
5237   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5238   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5239     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5240   }
5241   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5242   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5243 
5244   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5245   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5246   rvalues  = gen_from->values; /* holds the length of receiving row */
5247   svalues  = gen_to->values;   /* holds the length of sending row */
5248   nrecvs   = gen_from->n;
5249   nsends   = gen_to->n;
5250 
5251   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5252   srow    = gen_to->indices;    /* local row index to be sent */
5253   sstarts = gen_to->starts;
5254   sprocs  = gen_to->procs;
5255   sstatus = gen_to->sstatus;
5256   sbs     = gen_to->bs;
5257   rstarts = gen_from->starts;
5258   rprocs  = gen_from->procs;
5259   rbs     = gen_from->bs;
5260 
5261   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5262   if (scall == MAT_INITIAL_MATRIX) {
5263     /* i-array */
5264     /*---------*/
5265     /*  post receives */
5266     for (i=0; i<nrecvs; i++) {
5267       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5268       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5269       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5270     }
5271 
5272     /* pack the outgoing message */
5273     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5274 
5275     sstartsj[0] = 0;
5276     rstartsj[0] = 0;
5277     len         = 0; /* total length of j or a array to be sent */
5278     k           = 0;
5279     for (i=0; i<nsends; i++) {
5280       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5281       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5282       for (j=0; j<nrows; j++) {
5283         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5284         for (l=0; l<sbs; l++) {
5285           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5286 
5287           rowlen[j*sbs+l] = ncols;
5288 
5289           len += ncols;
5290           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5291         }
5292         k++;
5293       }
5294       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5295 
5296       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5297     }
5298     /* recvs and sends of i-array are completed */
5299     i = nrecvs;
5300     while (i--) {
5301       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5302     }
5303     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5304 
5305     /* allocate buffers for sending j and a arrays */
5306     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5307     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5308 
5309     /* create i-array of B_oth */
5310     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5311 
5312     b_othi[0] = 0;
5313     len       = 0; /* total length of j or a array to be received */
5314     k         = 0;
5315     for (i=0; i<nrecvs; i++) {
5316       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5317       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5318       for (j=0; j<nrows; j++) {
5319         b_othi[k+1] = b_othi[k] + rowlen[j];
5320         len        += rowlen[j]; k++;
5321       }
5322       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5323     }
5324 
5325     /* allocate space for j and a arrrays of B_oth */
5326     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5327     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5328 
5329     /* j-array */
5330     /*---------*/
5331     /*  post receives of j-array */
5332     for (i=0; i<nrecvs; i++) {
5333       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5334       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5335     }
5336 
5337     /* pack the outgoing message j-array */
5338     k = 0;
5339     for (i=0; i<nsends; i++) {
5340       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5341       bufJ  = bufj+sstartsj[i];
5342       for (j=0; j<nrows; j++) {
5343         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5344         for (ll=0; ll<sbs; ll++) {
5345           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5346           for (l=0; l<ncols; l++) {
5347             *bufJ++ = cols[l];
5348           }
5349           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5350         }
5351       }
5352       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5353     }
5354 
5355     /* recvs and sends of j-array are completed */
5356     i = nrecvs;
5357     while (i--) {
5358       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5359     }
5360     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5361   } else if (scall == MAT_REUSE_MATRIX) {
5362     sstartsj = *startsj_s;
5363     rstartsj = *startsj_r;
5364     bufa     = *bufa_ptr;
5365     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5366     b_otha   = b_oth->a;
5367   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5368 
5369   /* a-array */
5370   /*---------*/
5371   /*  post receives of a-array */
5372   for (i=0; i<nrecvs; i++) {
5373     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5374     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5375   }
5376 
5377   /* pack the outgoing message a-array */
5378   k = 0;
5379   for (i=0; i<nsends; i++) {
5380     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5381     bufA  = bufa+sstartsj[i];
5382     for (j=0; j<nrows; j++) {
5383       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5384       for (ll=0; ll<sbs; ll++) {
5385         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5386         for (l=0; l<ncols; l++) {
5387           *bufA++ = vals[l];
5388         }
5389         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5390       }
5391     }
5392     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5393   }
5394   /* recvs and sends of a-array are completed */
5395   i = nrecvs;
5396   while (i--) {
5397     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5398   }
5399   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5400   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5401 
5402   if (scall == MAT_INITIAL_MATRIX) {
5403     /* put together the new matrix */
5404     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5405 
5406     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5407     /* Since these are PETSc arrays, change flags to free them as necessary. */
5408     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5409     b_oth->free_a  = PETSC_TRUE;
5410     b_oth->free_ij = PETSC_TRUE;
5411     b_oth->nonew   = 0;
5412 
5413     ierr = PetscFree(bufj);CHKERRQ(ierr);
5414     if (!startsj_s || !bufa_ptr) {
5415       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5416       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5417     } else {
5418       *startsj_s = sstartsj;
5419       *startsj_r = rstartsj;
5420       *bufa_ptr  = bufa;
5421     }
5422   }
5423   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5424   PetscFunctionReturn(0);
5425 }
5426 
5427 #undef __FUNCT__
5428 #define __FUNCT__ "MatGetCommunicationStructs"
5429 /*@C
5430   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5431 
5432   Not Collective
5433 
5434   Input Parameters:
5435 . A - The matrix in mpiaij format
5436 
5437   Output Parameter:
5438 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5439 . colmap - A map from global column index to local index into lvec
5440 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5441 
5442   Level: developer
5443 
5444 @*/
5445 #if defined(PETSC_USE_CTABLE)
5446 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5447 #else
5448 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5449 #endif
5450 {
5451   Mat_MPIAIJ *a;
5452 
5453   PetscFunctionBegin;
5454   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5455   PetscValidPointer(lvec, 2);
5456   PetscValidPointer(colmap, 3);
5457   PetscValidPointer(multScatter, 4);
5458   a = (Mat_MPIAIJ*) A->data;
5459   if (lvec) *lvec = a->lvec;
5460   if (colmap) *colmap = a->colmap;
5461   if (multScatter) *multScatter = a->Mvctx;
5462   PetscFunctionReturn(0);
5463 }
5464 
5465 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5466 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5467 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5468 
5469 #undef __FUNCT__
5470 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5471 /*
5472     Computes (B'*A')' since computing B*A directly is untenable
5473 
5474                n                       p                          p
5475         (              )       (              )         (                  )
5476       m (      A       )  *  n (       B      )   =   m (         C        )
5477         (              )       (              )         (                  )
5478 
5479 */
5480 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5481 {
5482   PetscErrorCode ierr;
5483   Mat            At,Bt,Ct;
5484 
5485   PetscFunctionBegin;
5486   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5487   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5488   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5489   ierr = MatDestroy(&At);CHKERRQ(ierr);
5490   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5491   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5492   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5493   PetscFunctionReturn(0);
5494 }
5495 
5496 #undef __FUNCT__
5497 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5498 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5499 {
5500   PetscErrorCode ierr;
5501   PetscInt       m=A->rmap->n,n=B->cmap->n;
5502   Mat            Cmat;
5503 
5504   PetscFunctionBegin;
5505   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5506   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5507   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5508   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5509   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5510   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5511   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5512   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5513 
5514   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5515 
5516   *C = Cmat;
5517   PetscFunctionReturn(0);
5518 }
5519 
5520 /* ----------------------------------------------------------------*/
5521 #undef __FUNCT__
5522 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5523 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5524 {
5525   PetscErrorCode ierr;
5526 
5527   PetscFunctionBegin;
5528   if (scall == MAT_INITIAL_MATRIX) {
5529     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5530     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5531     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5532   }
5533   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5534   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5535   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5536   PetscFunctionReturn(0);
5537 }
5538 
5539 #if defined(PETSC_HAVE_MUMPS)
5540 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5541 #endif
5542 #if defined(PETSC_HAVE_PASTIX)
5543 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5544 #endif
5545 #if defined(PETSC_HAVE_SUPERLU_DIST)
5546 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5547 #endif
5548 #if defined(PETSC_HAVE_CLIQUE)
5549 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5550 #endif
5551 
5552 /*MC
5553    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5554 
5555    Options Database Keys:
5556 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5557 
5558   Level: beginner
5559 
5560 .seealso: MatCreateAIJ()
5561 M*/
5562 
5563 #undef __FUNCT__
5564 #define __FUNCT__ "MatCreate_MPIAIJ"
5565 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5566 {
5567   Mat_MPIAIJ     *b;
5568   PetscErrorCode ierr;
5569   PetscMPIInt    size;
5570 
5571   PetscFunctionBegin;
5572   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5573 
5574   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5575   B->data       = (void*)b;
5576   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5577   B->assembled  = PETSC_FALSE;
5578   B->insertmode = NOT_SET_VALUES;
5579   b->size       = size;
5580 
5581   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5582 
5583   /* build cache for off array entries formed */
5584   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5585 
5586   b->donotstash  = PETSC_FALSE;
5587   b->colmap      = 0;
5588   b->garray      = 0;
5589   b->roworiented = PETSC_TRUE;
5590 
5591   /* stuff used for matrix vector multiply */
5592   b->lvec  = NULL;
5593   b->Mvctx = NULL;
5594 
5595   /* stuff for MatGetRow() */
5596   b->rowindices   = 0;
5597   b->rowvalues    = 0;
5598   b->getrowactive = PETSC_FALSE;
5599 
5600   /* flexible pointer used in CUSP/CUSPARSE classes */
5601   b->spptr = NULL;
5602 
5603 #if defined(PETSC_HAVE_MUMPS)
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5605 #endif
5606 #if defined(PETSC_HAVE_PASTIX)
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5608 #endif
5609 #if defined(PETSC_HAVE_SUPERLU_DIST)
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5611 #endif
5612 #if defined(PETSC_HAVE_CLIQUE)
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5614 #endif
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5622   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5624   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5626   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5627   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5628   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5629   PetscFunctionReturn(0);
5630 }
5631 
5632 #undef __FUNCT__
5633 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5634 /*@
5635      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5636          and "off-diagonal" part of the matrix in CSR format.
5637 
5638    Collective on MPI_Comm
5639 
5640    Input Parameters:
5641 +  comm - MPI communicator
5642 .  m - number of local rows (Cannot be PETSC_DECIDE)
5643 .  n - This value should be the same as the local size used in creating the
5644        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5645        calculated if N is given) For square matrices n is almost always m.
5646 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5647 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5648 .   i - row indices for "diagonal" portion of matrix
5649 .   j - column indices
5650 .   a - matrix values
5651 .   oi - row indices for "off-diagonal" portion of matrix
5652 .   oj - column indices
5653 -   oa - matrix values
5654 
5655    Output Parameter:
5656 .   mat - the matrix
5657 
5658    Level: advanced
5659 
5660    Notes:
5661        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5662        must free the arrays once the matrix has been destroyed and not before.
5663 
5664        The i and j indices are 0 based
5665 
5666        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5667 
5668        This sets local rows and cannot be used to set off-processor values.
5669 
5670        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5671        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5672        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5673        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5674        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5675        communication if it is known that only local entries will be set.
5676 
5677 .keywords: matrix, aij, compressed row, sparse, parallel
5678 
5679 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5680           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5681 @*/
5682 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5683 {
5684   PetscErrorCode ierr;
5685   Mat_MPIAIJ     *maij;
5686 
5687   PetscFunctionBegin;
5688   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5689   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5690   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5691   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5692   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5693   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5694   maij = (Mat_MPIAIJ*) (*mat)->data;
5695 
5696   (*mat)->preallocated = PETSC_TRUE;
5697 
5698   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5699   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5700 
5701   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5702   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5703 
5704   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5705   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5706   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5707   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5708 
5709   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5710   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5711   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5712   PetscFunctionReturn(0);
5713 }
5714 
5715 /*
5716     Special version for direct calls from Fortran
5717 */
5718 #include <petsc-private/fortranimpl.h>
5719 
5720 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5721 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5722 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5723 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5724 #endif
5725 
5726 /* Change these macros so can be used in void function */
5727 #undef CHKERRQ
5728 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5729 #undef SETERRQ2
5730 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5731 #undef SETERRQ3
5732 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5733 #undef SETERRQ
5734 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5735 
5736 #undef __FUNCT__
5737 #define __FUNCT__ "matsetvaluesmpiaij_"
5738 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5739 {
5740   Mat            mat  = *mmat;
5741   PetscInt       m    = *mm, n = *mn;
5742   InsertMode     addv = *maddv;
5743   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5744   PetscScalar    value;
5745   PetscErrorCode ierr;
5746 
5747   MatCheckPreallocated(mat,1);
5748   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5749 
5750 #if defined(PETSC_USE_DEBUG)
5751   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5752 #endif
5753   {
5754     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5755     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5756     PetscBool roworiented = aij->roworiented;
5757 
5758     /* Some Variables required in the macro */
5759     Mat        A                 = aij->A;
5760     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5761     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5762     MatScalar  *aa               = a->a;
5763     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5764     Mat        B                 = aij->B;
5765     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5766     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5767     MatScalar  *ba               = b->a;
5768 
5769     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5770     PetscInt  nonew = a->nonew;
5771     MatScalar *ap1,*ap2;
5772 
5773     PetscFunctionBegin;
5774     for (i=0; i<m; i++) {
5775       if (im[i] < 0) continue;
5776 #if defined(PETSC_USE_DEBUG)
5777       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5778 #endif
5779       if (im[i] >= rstart && im[i] < rend) {
5780         row      = im[i] - rstart;
5781         lastcol1 = -1;
5782         rp1      = aj + ai[row];
5783         ap1      = aa + ai[row];
5784         rmax1    = aimax[row];
5785         nrow1    = ailen[row];
5786         low1     = 0;
5787         high1    = nrow1;
5788         lastcol2 = -1;
5789         rp2      = bj + bi[row];
5790         ap2      = ba + bi[row];
5791         rmax2    = bimax[row];
5792         nrow2    = bilen[row];
5793         low2     = 0;
5794         high2    = nrow2;
5795 
5796         for (j=0; j<n; j++) {
5797           if (roworiented) value = v[i*n+j];
5798           else value = v[i+j*m];
5799           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5800           if (in[j] >= cstart && in[j] < cend) {
5801             col = in[j] - cstart;
5802             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5803           } else if (in[j] < 0) continue;
5804 #if defined(PETSC_USE_DEBUG)
5805           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5806 #endif
5807           else {
5808             if (mat->was_assembled) {
5809               if (!aij->colmap) {
5810                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5811               }
5812 #if defined(PETSC_USE_CTABLE)
5813               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5814               col--;
5815 #else
5816               col = aij->colmap[in[j]] - 1;
5817 #endif
5818               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5819                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5820                 col  =  in[j];
5821                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5822                 B     = aij->B;
5823                 b     = (Mat_SeqAIJ*)B->data;
5824                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5825                 rp2   = bj + bi[row];
5826                 ap2   = ba + bi[row];
5827                 rmax2 = bimax[row];
5828                 nrow2 = bilen[row];
5829                 low2  = 0;
5830                 high2 = nrow2;
5831                 bm    = aij->B->rmap->n;
5832                 ba    = b->a;
5833               }
5834             } else col = in[j];
5835             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5836           }
5837         }
5838       } else if (!aij->donotstash) {
5839         if (roworiented) {
5840           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5841         } else {
5842           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5843         }
5844       }
5845     }
5846   }
5847   PetscFunctionReturnVoid();
5848 }
5849 
5850