xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dd065a40dfdbddb790912f9737a8fa03de5a861b)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatDestroy_MatRedundant"
2512 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2513 {
2514   PetscErrorCode ierr;
2515   Mat_Redundant  *redund;
2516   PetscInt       i;
2517   PetscMPIInt    size;
2518 
2519   PetscFunctionBegin;
2520   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2521   if (size == 1) {
2522     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2523     redund = a->redundant;
2524   } else {
2525     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2526     redund = a->redundant;
2527   }
2528   if (redund){
2529     if (redund->matseq) { /* '-new' option */
2530       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2531       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2532       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2533       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2534     } else {
2535       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2536       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2537       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2538       for (i=0; i<redund->nrecvs; i++) {
2539         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2540         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2541       }
2542       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2543 
2544       if (redund->psubcomm) {
2545         ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2546       }
2547     }
2548 
2549     ierr = redund->Destroy(A);CHKERRQ(ierr);
2550     ierr = PetscFree(redund);CHKERRQ(ierr);
2551   }
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 #undef __FUNCT__
2556 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm"
2557 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2558 {
2559   PetscMPIInt    rank,size;
2560   MPI_Comm       comm,subcomm=psubcomm->comm;
2561   PetscErrorCode ierr;
2562   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2563   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2564   PetscInt       *rowrange = mat->rmap->range;
2565   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2566   Mat            A = aij->A,B=aij->B,C=*matredundant;
2567   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2568   PetscScalar    *sbuf_a;
2569   PetscInt       nzlocal=a->nz+b->nz;
2570   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2571   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2572   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2573   MatScalar      *aworkA,*aworkB;
2574   PetscScalar    *vals;
2575   PetscMPIInt    tag1,tag2,tag3,imdex;
2576   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2577   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2578   MPI_Status     recv_status,*send_status;
2579   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2580   PetscInt       **rbuf_j=NULL;
2581   PetscScalar    **rbuf_a=NULL;
2582   Mat_Redundant  *redund =NULL;
2583   PetscBool      flg=PETSC_FALSE;
2584 
2585   PetscFunctionBegin;
2586   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2587   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2588   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2589   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2590   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2591 
2592   /* ---------- new imples: use MatGetSubMatrices() ------------*/
2593   ierr = PetscOptionsGetBool(NULL,"-new",&flg,NULL);CHKERRQ(ierr);
2594   if (flg) {
2595     Mat        *matseq;
2596     IS         isrow,iscol;
2597     PetscInt   mloc_sub,rstart,rend;
2598 
2599     if (reuse == MAT_INITIAL_MATRIX) {
2600       /* create a local sequential matrix matseq[0] */
2601       mloc_sub = PETSC_DECIDE;
2602       ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2603       ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2604       rstart = rend - mloc_sub;
2605       /* printf("[%d] Use MatGetSubMatrices()...rows %d - %d, mloc_sub %d\n",rank,rstart,rend,mloc_sub); */
2606       ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2607       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2608     } else { /* reuse == MAT_REUSE_MATRIX */
2609       if (subsize == 1) {
2610         Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2611         redund = c->redundant;
2612       } else {
2613         Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2614         redund = c->redundant;
2615       }
2616 
2617       isrow = redund->isrow;
2618       iscol = redund->iscol;
2619       matseq = redund->matseq;
2620     }
2621 
2622     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2623     /*
2624     if (rank==0) {
2625       printf("[%d] matsub:\n",rank);
2626       ierr = MatView(matseq[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
2627     }
2628     ierr = MPI_Barrier(comm);CHKERRQ(ierr);
2629      */
2630 
2631     /* Create matredundant by concatenating matseq[0] from processors in this subcomm */
2632     /*
2633     if (reuse == MAT_REUSE_MATRIX) {
2634       if (!rank) printf("matredundant:\n");
2635       ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
2636     }
2637      */
2638     ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2639     /*
2640     if (nsubcomm == 1) {
2641       if (!rank) printf( "matredundant\n");
2642       ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
2643      } */
2644 
2645     if (reuse == MAT_INITIAL_MATRIX) {
2646       /* create a supporting struct and attach it to C for reuse */
2647       ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr);
2648       if (subsize == 1) {
2649         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2650         c->redundant = redund;
2651       } else {
2652         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2653         c->redundant = redund;
2654       }
2655 
2656       redund->isrow     = isrow;
2657       redund->iscol     = iscol;
2658       redund->matseq    = matseq;
2659       redund->psubcomm  = NULL;
2660 
2661       redund->Destroy               = (*matredundant)->ops->destroy;
2662       (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2663     }
2664     PetscFunctionReturn(0);
2665   }
2666   /* ----------------------------------------------------*/
2667 
2668   if (reuse == MAT_REUSE_MATRIX) {
2669     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2670     if (subsize == 1) {
2671       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2672       redund = c->redundant;
2673     } else {
2674       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2675       redund = c->redundant;
2676     }
2677     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2678 
2679     nsends    = redund->nsends;
2680     nrecvs    = redund->nrecvs;
2681     send_rank = redund->send_rank;
2682     recv_rank = redund->recv_rank;
2683     sbuf_nz   = redund->sbuf_nz;
2684     rbuf_nz   = redund->rbuf_nz;
2685     sbuf_j    = redund->sbuf_j;
2686     sbuf_a    = redund->sbuf_a;
2687     rbuf_j    = redund->rbuf_j;
2688     rbuf_a    = redund->rbuf_a;
2689   }
2690 
2691   if (reuse == MAT_INITIAL_MATRIX) {
2692     PetscInt    nleftover,np_subcomm;
2693 
2694     /* get the destination processors' id send_rank, nsends and nrecvs */
2695     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2696 
2697     np_subcomm = size/nsubcomm;
2698     nleftover  = size - nsubcomm*np_subcomm;
2699 
2700     nsends = 0; nrecvs = 0;
2701     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2702       /* -------------------------------------------*/
2703       for (i=0; i<size; i++) {
2704         if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2705           send_rank[nsends]   = i; nsends++;
2706           recv_rank[nrecvs++] = i;
2707           /* printf("[%d] send to and recv from [%d]\n",rank,i); */
2708         }
2709       }
2710       if (rank >= size - nleftover) { /* this proc is a leftover processor */
2711         i = size-nleftover-1;
2712         j = 0;
2713         while (j < nsubcomm - nleftover) {
2714           send_rank[nsends++] = i;
2715           i--; j++;
2716           /* printf("[%d] send to [%d]\n",rank,i); */
2717         }
2718       }
2719 
2720       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2721         for (i=0; i<nleftover; i++) {
2722           recv_rank[nrecvs++] = size-nleftover+i;
2723           /* printf("[%d] recv from [%d]\n",rank,i); */
2724         }
2725       }
2726     } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) {
2727       /* --------------------------------------------------*/
2728       PetscInt color,subcommstart;
2729       subcommstart=0;
2730       for (color=0; color<nsubcomm; color++) {
2731         if (psubcomm->color != color) {
2732           for (i=0; i<psubcomm->subsize[color]; i++) {
2733             if (subrank == i) { /* my_subrank == other's subrank */
2734               send_rank[nsends++] = subcommstart+i;
2735               recv_rank[nrecvs++] = subcommstart+i;
2736               /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */
2737             }
2738           }
2739         }
2740         subcommstart += psubcomm->subsize[color];
2741       }
2742       if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */
2743         subcommstart=0;
2744         for (color=0; color<nsubcomm; color++) {
2745           subcommstart += psubcomm->subsize[color];
2746           if (psubcomm->color == color) continue;
2747           if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */
2748             send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */
2749             /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */
2750           }
2751         }
2752       }
2753 
2754       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2755         subcommstart=0;
2756         for (color=0; color<nsubcomm; color++) {
2757           subcommstart += psubcomm->subsize[color];
2758           if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */
2759             recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */
2760             /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */
2761           }
2762         }
2763       }
2764     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for PetscSubcomm type %D",psubcomm->type);
2765 
2766     /* allocate sbuf_j, sbuf_a */
2767     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2768     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2769     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2770     /*
2771     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2772     ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
2773      */
2774   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2775 
2776   /* copy mat's local entries into the buffers */
2777   if (reuse == MAT_INITIAL_MATRIX) {
2778     rownz_max = 0;
2779     rptr      = sbuf_j;
2780     cols      = sbuf_j + rend-rstart + 1;
2781     vals      = sbuf_a;
2782     rptr[0]   = 0;
2783     for (i=0; i<rend-rstart; i++) {
2784       row    = i + rstart;
2785       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2786       ncols  = nzA + nzB;
2787       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2788       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2789       /* load the column indices for this row into cols */
2790       lwrite = 0;
2791       for (l=0; l<nzB; l++) {
2792         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2793           vals[lwrite]   = aworkB[l];
2794           cols[lwrite++] = ctmp;
2795         }
2796       }
2797       for (l=0; l<nzA; l++) {
2798         vals[lwrite]   = aworkA[l];
2799         cols[lwrite++] = cstart + cworkA[l];
2800       }
2801       for (l=0; l<nzB; l++) {
2802         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2803           vals[lwrite]   = aworkB[l];
2804           cols[lwrite++] = ctmp;
2805         }
2806       }
2807       vals     += ncols;
2808       cols     += ncols;
2809       rptr[i+1] = rptr[i] + ncols;
2810       if (rownz_max < ncols) rownz_max = ncols;
2811     }
2812     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2813   } else { /* only copy matrix values into sbuf_a */
2814     rptr    = sbuf_j;
2815     vals    = sbuf_a;
2816     rptr[0] = 0;
2817     for (i=0; i<rend-rstart; i++) {
2818       row    = i + rstart;
2819       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2820       ncols  = nzA + nzB;
2821       cworkB = b->j + b->i[i];
2822       aworkA = a->a + a->i[i];
2823       aworkB = b->a + b->i[i];
2824       lwrite = 0;
2825       for (l=0; l<nzB; l++) {
2826         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2827       }
2828       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2829       for (l=0; l<nzB; l++) {
2830         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2831       }
2832       vals     += ncols;
2833       rptr[i+1] = rptr[i] + ncols;
2834     }
2835   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2836 
2837   /* send nzlocal to others, and recv other's nzlocal */
2838   /*--------------------------------------------------*/
2839   if (reuse == MAT_INITIAL_MATRIX) {
2840     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2841 
2842     s_waits2 = s_waits3 + nsends;
2843     s_waits1 = s_waits2 + nsends;
2844     r_waits1 = s_waits1 + nsends;
2845     r_waits2 = r_waits1 + nrecvs;
2846     r_waits3 = r_waits2 + nrecvs;
2847   } else {
2848     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2849 
2850     r_waits3 = s_waits3 + nsends;
2851   }
2852 
2853   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2854   if (reuse == MAT_INITIAL_MATRIX) {
2855     /* get new tags to keep the communication clean */
2856     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2857     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2858     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2859 
2860     /* post receives of other's nzlocal */
2861     for (i=0; i<nrecvs; i++) {
2862       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2863     }
2864     /* send nzlocal to others */
2865     for (i=0; i<nsends; i++) {
2866       sbuf_nz[i] = nzlocal;
2867       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2868     }
2869     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2870     count = nrecvs;
2871     while (count) {
2872       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2873 
2874       recv_rank[imdex] = recv_status.MPI_SOURCE;
2875       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2876       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2877 
2878       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2879 
2880       rbuf_nz[imdex] += i + 2;
2881 
2882       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2883       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2884       count--;
2885     }
2886     /* wait on sends of nzlocal */
2887     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2888     /* send mat->i,j to others, and recv from other's */
2889     /*------------------------------------------------*/
2890     for (i=0; i<nsends; i++) {
2891       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2892       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2893     }
2894     /* wait on receives of mat->i,j */
2895     /*------------------------------*/
2896     count = nrecvs;
2897     while (count) {
2898       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2899       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2900       count--;
2901     }
2902     /* wait on sends of mat->i,j */
2903     /*---------------------------*/
2904     if (nsends) {
2905       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2906     }
2907   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2908 
2909   /* post receives, send and receive mat->a */
2910   /*----------------------------------------*/
2911   for (imdex=0; imdex<nrecvs; imdex++) {
2912     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2913   }
2914   for (i=0; i<nsends; i++) {
2915     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2916   }
2917   count = nrecvs;
2918   while (count) {
2919     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2920     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2921     count--;
2922   }
2923   if (nsends) {
2924     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2925   }
2926 
2927   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2928 
2929   /* create redundant matrix */
2930   /*-------------------------*/
2931   if (reuse == MAT_INITIAL_MATRIX) {
2932     const PetscInt *range;
2933     PetscInt       rstart_sub,rend_sub,mloc_sub;
2934 
2935     /* compute rownz_max for preallocation */
2936     for (imdex=0; imdex<nrecvs; imdex++) {
2937       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2938       rptr = rbuf_j[imdex];
2939       for (i=0; i<j; i++) {
2940         ncols = rptr[i+1] - rptr[i];
2941         if (rownz_max < ncols) rownz_max = ncols;
2942       }
2943     }
2944 
2945     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2946 
2947     /* get local size of redundant matrix
2948        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2949     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2950     rstart_sub = range[nsubcomm*subrank];
2951     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2952       rend_sub = range[nsubcomm*(subrank+1)];
2953     } else {
2954       rend_sub = mat->rmap->N;
2955     }
2956     mloc_sub = rend_sub - rstart_sub;
2957 
2958     if (M == N) {
2959       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2960     } else { /* non-square matrix */
2961       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2962     }
2963     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2964     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2965     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2966     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2967   } else {
2968     C = *matredundant;
2969   }
2970 
2971   /* insert local matrix entries */
2972   rptr = sbuf_j;
2973   cols = sbuf_j + rend-rstart + 1;
2974   vals = sbuf_a;
2975   for (i=0; i<rend-rstart; i++) {
2976     row   = i + rstart;
2977     ncols = rptr[i+1] - rptr[i];
2978     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2979     vals += ncols;
2980     cols += ncols;
2981   }
2982   /* insert received matrix entries */
2983   for (imdex=0; imdex<nrecvs; imdex++) {
2984     rstart = rowrange[recv_rank[imdex]];
2985     rend   = rowrange[recv_rank[imdex]+1];
2986     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2987     rptr   = rbuf_j[imdex];
2988     cols   = rbuf_j[imdex] + rend-rstart + 1;
2989     vals   = rbuf_a[imdex];
2990     for (i=0; i<rend-rstart; i++) {
2991       row   = i + rstart;
2992       ncols = rptr[i+1] - rptr[i];
2993       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2994       vals += ncols;
2995       cols += ncols;
2996     }
2997   }
2998   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2999   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3000 
3001   if (reuse == MAT_INITIAL_MATRIX) {
3002     *matredundant = C;
3003 
3004     /* create a supporting struct and attach it to C for reuse */
3005     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
3006     if (subsize == 1) {
3007       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
3008       c->redundant = redund;
3009     } else {
3010       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
3011       c->redundant = redund;
3012     }
3013 
3014     redund->nzlocal   = nzlocal;
3015     redund->nsends    = nsends;
3016     redund->nrecvs    = nrecvs;
3017     redund->send_rank = send_rank;
3018     redund->recv_rank = recv_rank;
3019     redund->sbuf_nz   = sbuf_nz;
3020     redund->rbuf_nz   = rbuf_nz;
3021     redund->sbuf_j    = sbuf_j;
3022     redund->sbuf_a    = sbuf_a;
3023     redund->rbuf_j    = rbuf_j;
3024     redund->rbuf_a    = rbuf_a;
3025     redund->psubcomm  = NULL;
3026 
3027     redund->Destroy = C->ops->destroy;
3028     C->ops->destroy = MatDestroy_MatRedundant;
3029   }
3030   PetscFunctionReturn(0);
3031 }
3032 
3033 #undef __FUNCT__
3034 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
3035 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
3036 {
3037   PetscErrorCode ierr;
3038 
3039   PetscFunctionBegin;
3040   if (subcomm == MPI_COMM_NULL || subcomm == PETSC_COMM_SELF) { /* create psubcomm */
3041     MPI_Comm       comm;
3042     PetscSubcomm   psubcomm;
3043     PetscMPIInt    size,subsize;
3044 
3045     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3046     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3047     ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
3048     ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
3049     ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr);
3050     ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
3051 
3052     ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr);
3053 
3054     /* free psubcomm in MatDestroy_MatRedundant() */
3055     ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
3056     if (subsize == 1) {
3057       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
3058       c->redundant->psubcomm = psubcomm;
3059     } else {
3060       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3061       c->redundant->psubcomm = psubcomm ;
3062     }
3063   } else {
3064     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet");
3065   }
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 #undef __FUNCT__
3070 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
3071 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3072 {
3073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3074   PetscErrorCode ierr;
3075   PetscInt       i,*idxb = 0;
3076   PetscScalar    *va,*vb;
3077   Vec            vtmp;
3078 
3079   PetscFunctionBegin;
3080   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
3081   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3082   if (idx) {
3083     for (i=0; i<A->rmap->n; i++) {
3084       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3085     }
3086   }
3087 
3088   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3089   if (idx) {
3090     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3091   }
3092   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3093   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3094 
3095   for (i=0; i<A->rmap->n; i++) {
3096     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3097       va[i] = vb[i];
3098       if (idx) idx[i] = a->garray[idxb[i]];
3099     }
3100   }
3101 
3102   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3103   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3104   ierr = PetscFree(idxb);CHKERRQ(ierr);
3105   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3106   PetscFunctionReturn(0);
3107 }
3108 
3109 #undef __FUNCT__
3110 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
3111 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3112 {
3113   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3114   PetscErrorCode ierr;
3115   PetscInt       i,*idxb = 0;
3116   PetscScalar    *va,*vb;
3117   Vec            vtmp;
3118 
3119   PetscFunctionBegin;
3120   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
3121   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3122   if (idx) {
3123     for (i=0; i<A->cmap->n; i++) {
3124       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3125     }
3126   }
3127 
3128   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3129   if (idx) {
3130     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3131   }
3132   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3133   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3134 
3135   for (i=0; i<A->rmap->n; i++) {
3136     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3137       va[i] = vb[i];
3138       if (idx) idx[i] = a->garray[idxb[i]];
3139     }
3140   }
3141 
3142   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3143   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3144   ierr = PetscFree(idxb);CHKERRQ(ierr);
3145   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3146   PetscFunctionReturn(0);
3147 }
3148 
3149 #undef __FUNCT__
3150 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3151 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3152 {
3153   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3154   PetscInt       n      = A->rmap->n;
3155   PetscInt       cstart = A->cmap->rstart;
3156   PetscInt       *cmap  = mat->garray;
3157   PetscInt       *diagIdx, *offdiagIdx;
3158   Vec            diagV, offdiagV;
3159   PetscScalar    *a, *diagA, *offdiagA;
3160   PetscInt       r;
3161   PetscErrorCode ierr;
3162 
3163   PetscFunctionBegin;
3164   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3165   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3166   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3167   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3168   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3169   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3170   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3171   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3172   for (r = 0; r < n; ++r) {
3173     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3174       a[r]   = diagA[r];
3175       idx[r] = cstart + diagIdx[r];
3176     } else {
3177       a[r]   = offdiagA[r];
3178       idx[r] = cmap[offdiagIdx[r]];
3179     }
3180   }
3181   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3182   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3183   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3184   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3185   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3186   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3187   PetscFunctionReturn(0);
3188 }
3189 
3190 #undef __FUNCT__
3191 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3192 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3193 {
3194   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3195   PetscInt       n      = A->rmap->n;
3196   PetscInt       cstart = A->cmap->rstart;
3197   PetscInt       *cmap  = mat->garray;
3198   PetscInt       *diagIdx, *offdiagIdx;
3199   Vec            diagV, offdiagV;
3200   PetscScalar    *a, *diagA, *offdiagA;
3201   PetscInt       r;
3202   PetscErrorCode ierr;
3203 
3204   PetscFunctionBegin;
3205   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3206   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3207   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3208   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3209   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3210   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3211   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3212   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3213   for (r = 0; r < n; ++r) {
3214     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3215       a[r]   = diagA[r];
3216       idx[r] = cstart + diagIdx[r];
3217     } else {
3218       a[r]   = offdiagA[r];
3219       idx[r] = cmap[offdiagIdx[r]];
3220     }
3221   }
3222   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3223   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3224   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3225   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3226   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3227   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3228   PetscFunctionReturn(0);
3229 }
3230 
3231 #undef __FUNCT__
3232 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3233 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3234 {
3235   PetscErrorCode ierr;
3236   Mat            *dummy;
3237 
3238   PetscFunctionBegin;
3239   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3240   *newmat = *dummy;
3241   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3242   PetscFunctionReturn(0);
3243 }
3244 
3245 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3246 
3247 #undef __FUNCT__
3248 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3249 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3250 {
3251   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3252   PetscErrorCode ierr;
3253 
3254   PetscFunctionBegin;
3255   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3256   PetscFunctionReturn(0);
3257 }
3258 
3259 #undef __FUNCT__
3260 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3261 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3262 {
3263   PetscErrorCode ierr;
3264   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3265 
3266   PetscFunctionBegin;
3267   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3268   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3269   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3270   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3271   PetscFunctionReturn(0);
3272 }
3273 
3274 /* -------------------------------------------------------------------*/
3275 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3276                                        MatGetRow_MPIAIJ,
3277                                        MatRestoreRow_MPIAIJ,
3278                                        MatMult_MPIAIJ,
3279                                 /* 4*/ MatMultAdd_MPIAIJ,
3280                                        MatMultTranspose_MPIAIJ,
3281                                        MatMultTransposeAdd_MPIAIJ,
3282 #if defined(PETSC_HAVE_PBGL)
3283                                        MatSolve_MPIAIJ,
3284 #else
3285                                        0,
3286 #endif
3287                                        0,
3288                                        0,
3289                                 /*10*/ 0,
3290                                        0,
3291                                        0,
3292                                        MatSOR_MPIAIJ,
3293                                        MatTranspose_MPIAIJ,
3294                                 /*15*/ MatGetInfo_MPIAIJ,
3295                                        MatEqual_MPIAIJ,
3296                                        MatGetDiagonal_MPIAIJ,
3297                                        MatDiagonalScale_MPIAIJ,
3298                                        MatNorm_MPIAIJ,
3299                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3300                                        MatAssemblyEnd_MPIAIJ,
3301                                        MatSetOption_MPIAIJ,
3302                                        MatZeroEntries_MPIAIJ,
3303                                 /*24*/ MatZeroRows_MPIAIJ,
3304                                        0,
3305 #if defined(PETSC_HAVE_PBGL)
3306                                        0,
3307 #else
3308                                        0,
3309 #endif
3310                                        0,
3311                                        0,
3312                                 /*29*/ MatSetUp_MPIAIJ,
3313 #if defined(PETSC_HAVE_PBGL)
3314                                        0,
3315 #else
3316                                        0,
3317 #endif
3318                                        0,
3319                                        0,
3320                                        0,
3321                                 /*34*/ MatDuplicate_MPIAIJ,
3322                                        0,
3323                                        0,
3324                                        0,
3325                                        0,
3326                                 /*39*/ MatAXPY_MPIAIJ,
3327                                        MatGetSubMatrices_MPIAIJ,
3328                                        MatIncreaseOverlap_MPIAIJ,
3329                                        MatGetValues_MPIAIJ,
3330                                        MatCopy_MPIAIJ,
3331                                 /*44*/ MatGetRowMax_MPIAIJ,
3332                                        MatScale_MPIAIJ,
3333                                        0,
3334                                        0,
3335                                        MatZeroRowsColumns_MPIAIJ,
3336                                 /*49*/ MatSetRandom_MPIAIJ,
3337                                        0,
3338                                        0,
3339                                        0,
3340                                        0,
3341                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3342                                        0,
3343                                        MatSetUnfactored_MPIAIJ,
3344                                        MatPermute_MPIAIJ,
3345                                        0,
3346                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3347                                        MatDestroy_MPIAIJ,
3348                                        MatView_MPIAIJ,
3349                                        0,
3350                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3351                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3352                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3353                                        0,
3354                                        0,
3355                                        0,
3356                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3357                                        MatGetRowMinAbs_MPIAIJ,
3358                                        0,
3359                                        MatSetColoring_MPIAIJ,
3360                                        0,
3361                                        MatSetValuesAdifor_MPIAIJ,
3362                                 /*75*/ MatFDColoringApply_AIJ,
3363                                        0,
3364                                        0,
3365                                        0,
3366                                        MatFindZeroDiagonals_MPIAIJ,
3367                                 /*80*/ 0,
3368                                        0,
3369                                        0,
3370                                 /*83*/ MatLoad_MPIAIJ,
3371                                        0,
3372                                        0,
3373                                        0,
3374                                        0,
3375                                        0,
3376                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3377                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3378                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3379                                        MatPtAP_MPIAIJ_MPIAIJ,
3380                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3381                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3382                                        0,
3383                                        0,
3384                                        0,
3385                                        0,
3386                                 /*99*/ 0,
3387                                        0,
3388                                        0,
3389                                        MatConjugate_MPIAIJ,
3390                                        0,
3391                                 /*104*/MatSetValuesRow_MPIAIJ,
3392                                        MatRealPart_MPIAIJ,
3393                                        MatImaginaryPart_MPIAIJ,
3394                                        0,
3395                                        0,
3396                                 /*109*/0,
3397                                        MatGetRedundantMatrix_MPIAIJ,
3398                                        MatGetRowMin_MPIAIJ,
3399                                        0,
3400                                        0,
3401                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3402                                        0,
3403                                        0,
3404                                        0,
3405                                        0,
3406                                 /*119*/0,
3407                                        0,
3408                                        0,
3409                                        0,
3410                                        MatGetMultiProcBlock_MPIAIJ,
3411                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3412                                        MatGetColumnNorms_MPIAIJ,
3413                                        MatInvertBlockDiagonal_MPIAIJ,
3414                                        0,
3415                                        MatGetSubMatricesParallel_MPIAIJ,
3416                                 /*129*/0,
3417                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3418                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3419                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3420                                        0,
3421                                 /*134*/0,
3422                                        0,
3423                                        0,
3424                                        0,
3425                                        0,
3426                                 /*139*/0,
3427                                        0
3428 };
3429 
3430 /* ----------------------------------------------------------------------------------------*/
3431 
3432 #undef __FUNCT__
3433 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3434 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3435 {
3436   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3437   PetscErrorCode ierr;
3438 
3439   PetscFunctionBegin;
3440   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3441   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3442   PetscFunctionReturn(0);
3443 }
3444 
3445 #undef __FUNCT__
3446 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3447 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3448 {
3449   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3450   PetscErrorCode ierr;
3451 
3452   PetscFunctionBegin;
3453   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3454   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3455   PetscFunctionReturn(0);
3456 }
3457 
3458 #undef __FUNCT__
3459 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3460 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3461 {
3462   Mat_MPIAIJ     *b;
3463   PetscErrorCode ierr;
3464 
3465   PetscFunctionBegin;
3466   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3467   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3468   b = (Mat_MPIAIJ*)B->data;
3469 
3470   if (!B->preallocated) {
3471     /* Explicitly create 2 MATSEQAIJ matrices. */
3472     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3473     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3474     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3475     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3476     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3477     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3478     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3479     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3480     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3481     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3482   }
3483 
3484   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3485   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3486   B->preallocated = PETSC_TRUE;
3487   PetscFunctionReturn(0);
3488 }
3489 
3490 #undef __FUNCT__
3491 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3492 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3493 {
3494   Mat            mat;
3495   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3496   PetscErrorCode ierr;
3497 
3498   PetscFunctionBegin;
3499   *newmat = 0;
3500   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3501   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3502   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3503   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3504   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3505   a       = (Mat_MPIAIJ*)mat->data;
3506 
3507   mat->factortype   = matin->factortype;
3508   mat->rmap->bs     = matin->rmap->bs;
3509   mat->cmap->bs     = matin->cmap->bs;
3510   mat->assembled    = PETSC_TRUE;
3511   mat->insertmode   = NOT_SET_VALUES;
3512   mat->preallocated = PETSC_TRUE;
3513 
3514   a->size         = oldmat->size;
3515   a->rank         = oldmat->rank;
3516   a->donotstash   = oldmat->donotstash;
3517   a->roworiented  = oldmat->roworiented;
3518   a->rowindices   = 0;
3519   a->rowvalues    = 0;
3520   a->getrowactive = PETSC_FALSE;
3521 
3522   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3523   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3524 
3525   if (oldmat->colmap) {
3526 #if defined(PETSC_USE_CTABLE)
3527     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3528 #else
3529     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3530     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3531     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3532 #endif
3533   } else a->colmap = 0;
3534   if (oldmat->garray) {
3535     PetscInt len;
3536     len  = oldmat->B->cmap->n;
3537     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3538     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3539     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3540   } else a->garray = 0;
3541 
3542   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3543   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3544   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3545   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3546   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3547   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3548   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3549   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3550   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3551   *newmat = mat;
3552   PetscFunctionReturn(0);
3553 }
3554 
3555 
3556 
3557 #undef __FUNCT__
3558 #define __FUNCT__ "MatLoad_MPIAIJ"
3559 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3560 {
3561   PetscScalar    *vals,*svals;
3562   MPI_Comm       comm;
3563   PetscErrorCode ierr;
3564   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3565   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3566   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3567   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3568   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3569   int            fd;
3570   PetscInt       bs = 1;
3571 
3572   PetscFunctionBegin;
3573   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3574   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3575   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3576   if (!rank) {
3577     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3578     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3579     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3580   }
3581 
3582   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3583   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3584   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3585 
3586   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3587 
3588   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3589   M    = header[1]; N = header[2];
3590   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3591   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3592   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3593 
3594   /* If global sizes are set, check if they are consistent with that given in the file */
3595   if (sizesset) {
3596     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3597   }
3598   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3599   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3600 
3601   /* determine ownership of all (block) rows */
3602   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3603   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3604   else m = newMat->rmap->n; /* Set by user */
3605 
3606   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3607   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3608 
3609   /* First process needs enough room for process with most rows */
3610   if (!rank) {
3611     mmax = rowners[1];
3612     for (i=2; i<=size; i++) {
3613       mmax = PetscMax(mmax, rowners[i]);
3614     }
3615   } else mmax = -1;             /* unused, but compilers complain */
3616 
3617   rowners[0] = 0;
3618   for (i=2; i<=size; i++) {
3619     rowners[i] += rowners[i-1];
3620   }
3621   rstart = rowners[rank];
3622   rend   = rowners[rank+1];
3623 
3624   /* distribute row lengths to all processors */
3625   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3626   if (!rank) {
3627     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3628     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3629     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3630     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3631     for (j=0; j<m; j++) {
3632       procsnz[0] += ourlens[j];
3633     }
3634     for (i=1; i<size; i++) {
3635       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3636       /* calculate the number of nonzeros on each processor */
3637       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3638         procsnz[i] += rowlengths[j];
3639       }
3640       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3641     }
3642     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3643   } else {
3644     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3645   }
3646 
3647   if (!rank) {
3648     /* determine max buffer needed and allocate it */
3649     maxnz = 0;
3650     for (i=0; i<size; i++) {
3651       maxnz = PetscMax(maxnz,procsnz[i]);
3652     }
3653     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3654 
3655     /* read in my part of the matrix column indices  */
3656     nz   = procsnz[0];
3657     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3658     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3659 
3660     /* read in every one elses and ship off */
3661     for (i=1; i<size; i++) {
3662       nz   = procsnz[i];
3663       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3664       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3665     }
3666     ierr = PetscFree(cols);CHKERRQ(ierr);
3667   } else {
3668     /* determine buffer space needed for message */
3669     nz = 0;
3670     for (i=0; i<m; i++) {
3671       nz += ourlens[i];
3672     }
3673     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3674 
3675     /* receive message of column indices*/
3676     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3677   }
3678 
3679   /* determine column ownership if matrix is not square */
3680   if (N != M) {
3681     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3682     else n = newMat->cmap->n;
3683     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3684     cstart = cend - n;
3685   } else {
3686     cstart = rstart;
3687     cend   = rend;
3688     n      = cend - cstart;
3689   }
3690 
3691   /* loop over local rows, determining number of off diagonal entries */
3692   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3693   jj   = 0;
3694   for (i=0; i<m; i++) {
3695     for (j=0; j<ourlens[i]; j++) {
3696       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3697       jj++;
3698     }
3699   }
3700 
3701   for (i=0; i<m; i++) {
3702     ourlens[i] -= offlens[i];
3703   }
3704   if (!sizesset) {
3705     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3706   }
3707 
3708   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3709 
3710   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3711 
3712   for (i=0; i<m; i++) {
3713     ourlens[i] += offlens[i];
3714   }
3715 
3716   if (!rank) {
3717     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3718 
3719     /* read in my part of the matrix numerical values  */
3720     nz   = procsnz[0];
3721     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3722 
3723     /* insert into matrix */
3724     jj      = rstart;
3725     smycols = mycols;
3726     svals   = vals;
3727     for (i=0; i<m; i++) {
3728       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3729       smycols += ourlens[i];
3730       svals   += ourlens[i];
3731       jj++;
3732     }
3733 
3734     /* read in other processors and ship out */
3735     for (i=1; i<size; i++) {
3736       nz   = procsnz[i];
3737       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3738       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3739     }
3740     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3741   } else {
3742     /* receive numeric values */
3743     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3744 
3745     /* receive message of values*/
3746     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3747 
3748     /* insert into matrix */
3749     jj      = rstart;
3750     smycols = mycols;
3751     svals   = vals;
3752     for (i=0; i<m; i++) {
3753       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3754       smycols += ourlens[i];
3755       svals   += ourlens[i];
3756       jj++;
3757     }
3758   }
3759   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3760   ierr = PetscFree(vals);CHKERRQ(ierr);
3761   ierr = PetscFree(mycols);CHKERRQ(ierr);
3762   ierr = PetscFree(rowners);CHKERRQ(ierr);
3763   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3764   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3765   PetscFunctionReturn(0);
3766 }
3767 
3768 #undef __FUNCT__
3769 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3770 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3771 {
3772   PetscErrorCode ierr;
3773   IS             iscol_local;
3774   PetscInt       csize;
3775 
3776   PetscFunctionBegin;
3777   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3778   if (call == MAT_REUSE_MATRIX) {
3779     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3780     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3781   } else {
3782     PetscInt cbs;
3783     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3784     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3785     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3786   }
3787   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3788   if (call == MAT_INITIAL_MATRIX) {
3789     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3790     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3791   }
3792   PetscFunctionReturn(0);
3793 }
3794 
3795 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3796 #undef __FUNCT__
3797 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3798 /*
3799     Not great since it makes two copies of the submatrix, first an SeqAIJ
3800   in local and then by concatenating the local matrices the end result.
3801   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3802 
3803   Note: This requires a sequential iscol with all indices.
3804 */
3805 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3806 {
3807   PetscErrorCode ierr;
3808   PetscMPIInt    rank,size;
3809   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3810   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3811   PetscBool      allcolumns, colflag;
3812   Mat            M,Mreuse;
3813   MatScalar      *vwork,*aa;
3814   MPI_Comm       comm;
3815   Mat_SeqAIJ     *aij;
3816 
3817   PetscFunctionBegin;
3818   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3819   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3820   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3821 
3822   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3823   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3824   if (colflag && ncol == mat->cmap->N) {
3825     allcolumns = PETSC_TRUE;
3826   } else {
3827     allcolumns = PETSC_FALSE;
3828   }
3829   if (call ==  MAT_REUSE_MATRIX) {
3830     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3831     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3832     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3833   } else {
3834     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3835   }
3836 
3837   /*
3838       m - number of local rows
3839       n - number of columns (same on all processors)
3840       rstart - first row in new global matrix generated
3841   */
3842   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3843   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3844   if (call == MAT_INITIAL_MATRIX) {
3845     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3846     ii  = aij->i;
3847     jj  = aij->j;
3848 
3849     /*
3850         Determine the number of non-zeros in the diagonal and off-diagonal
3851         portions of the matrix in order to do correct preallocation
3852     */
3853 
3854     /* first get start and end of "diagonal" columns */
3855     if (csize == PETSC_DECIDE) {
3856       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3857       if (mglobal == n) { /* square matrix */
3858         nlocal = m;
3859       } else {
3860         nlocal = n/size + ((n % size) > rank);
3861       }
3862     } else {
3863       nlocal = csize;
3864     }
3865     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3866     rstart = rend - nlocal;
3867     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3868 
3869     /* next, compute all the lengths */
3870     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3871     olens = dlens + m;
3872     for (i=0; i<m; i++) {
3873       jend = ii[i+1] - ii[i];
3874       olen = 0;
3875       dlen = 0;
3876       for (j=0; j<jend; j++) {
3877         if (*jj < rstart || *jj >= rend) olen++;
3878         else dlen++;
3879         jj++;
3880       }
3881       olens[i] = olen;
3882       dlens[i] = dlen;
3883     }
3884     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3885     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3886     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3887     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3888     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3889     ierr = PetscFree(dlens);CHKERRQ(ierr);
3890   } else {
3891     PetscInt ml,nl;
3892 
3893     M    = *newmat;
3894     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3895     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3896     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3897     /*
3898          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3899        rather than the slower MatSetValues().
3900     */
3901     M->was_assembled = PETSC_TRUE;
3902     M->assembled     = PETSC_FALSE;
3903   }
3904   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3905   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3906   ii   = aij->i;
3907   jj   = aij->j;
3908   aa   = aij->a;
3909   for (i=0; i<m; i++) {
3910     row   = rstart + i;
3911     nz    = ii[i+1] - ii[i];
3912     cwork = jj;     jj += nz;
3913     vwork = aa;     aa += nz;
3914     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3915   }
3916 
3917   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3918   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3919   *newmat = M;
3920 
3921   /* save submatrix used in processor for next request */
3922   if (call ==  MAT_INITIAL_MATRIX) {
3923     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3924     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3925   }
3926   PetscFunctionReturn(0);
3927 }
3928 
3929 #undef __FUNCT__
3930 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3931 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3932 {
3933   PetscInt       m,cstart, cend,j,nnz,i,d;
3934   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3935   const PetscInt *JJ;
3936   PetscScalar    *values;
3937   PetscErrorCode ierr;
3938 
3939   PetscFunctionBegin;
3940   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3941 
3942   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3943   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3944   m      = B->rmap->n;
3945   cstart = B->cmap->rstart;
3946   cend   = B->cmap->rend;
3947   rstart = B->rmap->rstart;
3948 
3949   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3950 
3951 #if defined(PETSC_USE_DEBUGGING)
3952   for (i=0; i<m; i++) {
3953     nnz = Ii[i+1]- Ii[i];
3954     JJ  = J + Ii[i];
3955     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3956     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3957     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3958   }
3959 #endif
3960 
3961   for (i=0; i<m; i++) {
3962     nnz     = Ii[i+1]- Ii[i];
3963     JJ      = J + Ii[i];
3964     nnz_max = PetscMax(nnz_max,nnz);
3965     d       = 0;
3966     for (j=0; j<nnz; j++) {
3967       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3968     }
3969     d_nnz[i] = d;
3970     o_nnz[i] = nnz - d;
3971   }
3972   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3973   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3974 
3975   if (v) values = (PetscScalar*)v;
3976   else {
3977     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3978     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3979   }
3980 
3981   for (i=0; i<m; i++) {
3982     ii   = i + rstart;
3983     nnz  = Ii[i+1]- Ii[i];
3984     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3985   }
3986   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3987   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3988 
3989   if (!v) {
3990     ierr = PetscFree(values);CHKERRQ(ierr);
3991   }
3992   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3993   PetscFunctionReturn(0);
3994 }
3995 
3996 #undef __FUNCT__
3997 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3998 /*@
3999    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4000    (the default parallel PETSc format).
4001 
4002    Collective on MPI_Comm
4003 
4004    Input Parameters:
4005 +  B - the matrix
4006 .  i - the indices into j for the start of each local row (starts with zero)
4007 .  j - the column indices for each local row (starts with zero)
4008 -  v - optional values in the matrix
4009 
4010    Level: developer
4011 
4012    Notes:
4013        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4014      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4015      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4016 
4017        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4018 
4019        The format which is used for the sparse matrix input, is equivalent to a
4020     row-major ordering.. i.e for the following matrix, the input data expected is
4021     as shown:
4022 
4023         1 0 0
4024         2 0 3     P0
4025        -------
4026         4 5 6     P1
4027 
4028      Process0 [P0]: rows_owned=[0,1]
4029         i =  {0,1,3}  [size = nrow+1  = 2+1]
4030         j =  {0,0,2}  [size = nz = 6]
4031         v =  {1,2,3}  [size = nz = 6]
4032 
4033      Process1 [P1]: rows_owned=[2]
4034         i =  {0,3}    [size = nrow+1  = 1+1]
4035         j =  {0,1,2}  [size = nz = 6]
4036         v =  {4,5,6}  [size = nz = 6]
4037 
4038 .keywords: matrix, aij, compressed row, sparse, parallel
4039 
4040 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
4041           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4042 @*/
4043 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4044 {
4045   PetscErrorCode ierr;
4046 
4047   PetscFunctionBegin;
4048   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4049   PetscFunctionReturn(0);
4050 }
4051 
4052 #undef __FUNCT__
4053 #define __FUNCT__ "MatMPIAIJSetPreallocation"
4054 /*@C
4055    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4056    (the default parallel PETSc format).  For good matrix assembly performance
4057    the user should preallocate the matrix storage by setting the parameters
4058    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4059    performance can be increased by more than a factor of 50.
4060 
4061    Collective on MPI_Comm
4062 
4063    Input Parameters:
4064 +  A - the matrix
4065 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4066            (same value is used for all local rows)
4067 .  d_nnz - array containing the number of nonzeros in the various rows of the
4068            DIAGONAL portion of the local submatrix (possibly different for each row)
4069            or NULL, if d_nz is used to specify the nonzero structure.
4070            The size of this array is equal to the number of local rows, i.e 'm'.
4071            For matrices that will be factored, you must leave room for (and set)
4072            the diagonal entry even if it is zero.
4073 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4074            submatrix (same value is used for all local rows).
4075 -  o_nnz - array containing the number of nonzeros in the various rows of the
4076            OFF-DIAGONAL portion of the local submatrix (possibly different for
4077            each row) or NULL, if o_nz is used to specify the nonzero
4078            structure. The size of this array is equal to the number
4079            of local rows, i.e 'm'.
4080 
4081    If the *_nnz parameter is given then the *_nz parameter is ignored
4082 
4083    The AIJ format (also called the Yale sparse matrix format or
4084    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4085    storage.  The stored row and column indices begin with zero.
4086    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
4087 
4088    The parallel matrix is partitioned such that the first m0 rows belong to
4089    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4090    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4091 
4092    The DIAGONAL portion of the local submatrix of a processor can be defined
4093    as the submatrix which is obtained by extraction the part corresponding to
4094    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4095    first row that belongs to the processor, r2 is the last row belonging to
4096    the this processor, and c1-c2 is range of indices of the local part of a
4097    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4098    common case of a square matrix, the row and column ranges are the same and
4099    the DIAGONAL part is also square. The remaining portion of the local
4100    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4101 
4102    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4103 
4104    You can call MatGetInfo() to get information on how effective the preallocation was;
4105    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4106    You can also run with the option -info and look for messages with the string
4107    malloc in them to see if additional memory allocation was needed.
4108 
4109    Example usage:
4110 
4111    Consider the following 8x8 matrix with 34 non-zero values, that is
4112    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4113    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4114    as follows:
4115 
4116 .vb
4117             1  2  0  |  0  3  0  |  0  4
4118     Proc0   0  5  6  |  7  0  0  |  8  0
4119             9  0 10  | 11  0  0  | 12  0
4120     -------------------------------------
4121            13  0 14  | 15 16 17  |  0  0
4122     Proc1   0 18  0  | 19 20 21  |  0  0
4123             0  0  0  | 22 23  0  | 24  0
4124     -------------------------------------
4125     Proc2  25 26 27  |  0  0 28  | 29  0
4126            30  0  0  | 31 32 33  |  0 34
4127 .ve
4128 
4129    This can be represented as a collection of submatrices as:
4130 
4131 .vb
4132       A B C
4133       D E F
4134       G H I
4135 .ve
4136 
4137    Where the submatrices A,B,C are owned by proc0, D,E,F are
4138    owned by proc1, G,H,I are owned by proc2.
4139 
4140    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4141    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4142    The 'M','N' parameters are 8,8, and have the same values on all procs.
4143 
4144    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4145    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4146    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4147    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4148    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4149    matrix, ans [DF] as another SeqAIJ matrix.
4150 
4151    When d_nz, o_nz parameters are specified, d_nz storage elements are
4152    allocated for every row of the local diagonal submatrix, and o_nz
4153    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4154    One way to choose d_nz and o_nz is to use the max nonzerors per local
4155    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4156    In this case, the values of d_nz,o_nz are:
4157 .vb
4158      proc0 : dnz = 2, o_nz = 2
4159      proc1 : dnz = 3, o_nz = 2
4160      proc2 : dnz = 1, o_nz = 4
4161 .ve
4162    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4163    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4164    for proc3. i.e we are using 12+15+10=37 storage locations to store
4165    34 values.
4166 
4167    When d_nnz, o_nnz parameters are specified, the storage is specified
4168    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4169    In the above case the values for d_nnz,o_nnz are:
4170 .vb
4171      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4172      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4173      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4174 .ve
4175    Here the space allocated is sum of all the above values i.e 34, and
4176    hence pre-allocation is perfect.
4177 
4178    Level: intermediate
4179 
4180 .keywords: matrix, aij, compressed row, sparse, parallel
4181 
4182 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4183           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4184 @*/
4185 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4186 {
4187   PetscErrorCode ierr;
4188 
4189   PetscFunctionBegin;
4190   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4191   PetscValidType(B,1);
4192   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4193   PetscFunctionReturn(0);
4194 }
4195 
4196 #undef __FUNCT__
4197 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4198 /*@
4199      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4200          CSR format the local rows.
4201 
4202    Collective on MPI_Comm
4203 
4204    Input Parameters:
4205 +  comm - MPI communicator
4206 .  m - number of local rows (Cannot be PETSC_DECIDE)
4207 .  n - This value should be the same as the local size used in creating the
4208        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4209        calculated if N is given) For square matrices n is almost always m.
4210 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4211 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4212 .   i - row indices
4213 .   j - column indices
4214 -   a - matrix values
4215 
4216    Output Parameter:
4217 .   mat - the matrix
4218 
4219    Level: intermediate
4220 
4221    Notes:
4222        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4223      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4224      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4225 
4226        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4227 
4228        The format which is used for the sparse matrix input, is equivalent to a
4229     row-major ordering.. i.e for the following matrix, the input data expected is
4230     as shown:
4231 
4232         1 0 0
4233         2 0 3     P0
4234        -------
4235         4 5 6     P1
4236 
4237      Process0 [P0]: rows_owned=[0,1]
4238         i =  {0,1,3}  [size = nrow+1  = 2+1]
4239         j =  {0,0,2}  [size = nz = 6]
4240         v =  {1,2,3}  [size = nz = 6]
4241 
4242      Process1 [P1]: rows_owned=[2]
4243         i =  {0,3}    [size = nrow+1  = 1+1]
4244         j =  {0,1,2}  [size = nz = 6]
4245         v =  {4,5,6}  [size = nz = 6]
4246 
4247 .keywords: matrix, aij, compressed row, sparse, parallel
4248 
4249 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4250           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4251 @*/
4252 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4253 {
4254   PetscErrorCode ierr;
4255 
4256   PetscFunctionBegin;
4257   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4258   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4259   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4260   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4261   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4262   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4263   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4264   PetscFunctionReturn(0);
4265 }
4266 
4267 #undef __FUNCT__
4268 #define __FUNCT__ "MatCreateAIJ"
4269 /*@C
4270    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4271    (the default parallel PETSc format).  For good matrix assembly performance
4272    the user should preallocate the matrix storage by setting the parameters
4273    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4274    performance can be increased by more than a factor of 50.
4275 
4276    Collective on MPI_Comm
4277 
4278    Input Parameters:
4279 +  comm - MPI communicator
4280 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4281            This value should be the same as the local size used in creating the
4282            y vector for the matrix-vector product y = Ax.
4283 .  n - This value should be the same as the local size used in creating the
4284        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4285        calculated if N is given) For square matrices n is almost always m.
4286 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4287 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4288 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4289            (same value is used for all local rows)
4290 .  d_nnz - array containing the number of nonzeros in the various rows of the
4291            DIAGONAL portion of the local submatrix (possibly different for each row)
4292            or NULL, if d_nz is used to specify the nonzero structure.
4293            The size of this array is equal to the number of local rows, i.e 'm'.
4294 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4295            submatrix (same value is used for all local rows).
4296 -  o_nnz - array containing the number of nonzeros in the various rows of the
4297            OFF-DIAGONAL portion of the local submatrix (possibly different for
4298            each row) or NULL, if o_nz is used to specify the nonzero
4299            structure. The size of this array is equal to the number
4300            of local rows, i.e 'm'.
4301 
4302    Output Parameter:
4303 .  A - the matrix
4304 
4305    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4306    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4307    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4308 
4309    Notes:
4310    If the *_nnz parameter is given then the *_nz parameter is ignored
4311 
4312    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4313    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4314    storage requirements for this matrix.
4315 
4316    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4317    processor than it must be used on all processors that share the object for
4318    that argument.
4319 
4320    The user MUST specify either the local or global matrix dimensions
4321    (possibly both).
4322 
4323    The parallel matrix is partitioned across processors such that the
4324    first m0 rows belong to process 0, the next m1 rows belong to
4325    process 1, the next m2 rows belong to process 2 etc.. where
4326    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4327    values corresponding to [m x N] submatrix.
4328 
4329    The columns are logically partitioned with the n0 columns belonging
4330    to 0th partition, the next n1 columns belonging to the next
4331    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4332 
4333    The DIAGONAL portion of the local submatrix on any given processor
4334    is the submatrix corresponding to the rows and columns m,n
4335    corresponding to the given processor. i.e diagonal matrix on
4336    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4337    etc. The remaining portion of the local submatrix [m x (N-n)]
4338    constitute the OFF-DIAGONAL portion. The example below better
4339    illustrates this concept.
4340 
4341    For a square global matrix we define each processor's diagonal portion
4342    to be its local rows and the corresponding columns (a square submatrix);
4343    each processor's off-diagonal portion encompasses the remainder of the
4344    local matrix (a rectangular submatrix).
4345 
4346    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4347 
4348    When calling this routine with a single process communicator, a matrix of
4349    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4350    type of communicator, use the construction mechanism:
4351      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4352 
4353    By default, this format uses inodes (identical nodes) when possible.
4354    We search for consecutive rows with the same nonzero structure, thereby
4355    reusing matrix information to achieve increased efficiency.
4356 
4357    Options Database Keys:
4358 +  -mat_no_inode  - Do not use inodes
4359 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4360 -  -mat_aij_oneindex - Internally use indexing starting at 1
4361         rather than 0.  Note that when calling MatSetValues(),
4362         the user still MUST index entries starting at 0!
4363 
4364 
4365    Example usage:
4366 
4367    Consider the following 8x8 matrix with 34 non-zero values, that is
4368    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4369    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4370    as follows:
4371 
4372 .vb
4373             1  2  0  |  0  3  0  |  0  4
4374     Proc0   0  5  6  |  7  0  0  |  8  0
4375             9  0 10  | 11  0  0  | 12  0
4376     -------------------------------------
4377            13  0 14  | 15 16 17  |  0  0
4378     Proc1   0 18  0  | 19 20 21  |  0  0
4379             0  0  0  | 22 23  0  | 24  0
4380     -------------------------------------
4381     Proc2  25 26 27  |  0  0 28  | 29  0
4382            30  0  0  | 31 32 33  |  0 34
4383 .ve
4384 
4385    This can be represented as a collection of submatrices as:
4386 
4387 .vb
4388       A B C
4389       D E F
4390       G H I
4391 .ve
4392 
4393    Where the submatrices A,B,C are owned by proc0, D,E,F are
4394    owned by proc1, G,H,I are owned by proc2.
4395 
4396    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4397    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4398    The 'M','N' parameters are 8,8, and have the same values on all procs.
4399 
4400    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4401    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4402    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4403    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4404    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4405    matrix, ans [DF] as another SeqAIJ matrix.
4406 
4407    When d_nz, o_nz parameters are specified, d_nz storage elements are
4408    allocated for every row of the local diagonal submatrix, and o_nz
4409    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4410    One way to choose d_nz and o_nz is to use the max nonzerors per local
4411    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4412    In this case, the values of d_nz,o_nz are:
4413 .vb
4414      proc0 : dnz = 2, o_nz = 2
4415      proc1 : dnz = 3, o_nz = 2
4416      proc2 : dnz = 1, o_nz = 4
4417 .ve
4418    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4419    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4420    for proc3. i.e we are using 12+15+10=37 storage locations to store
4421    34 values.
4422 
4423    When d_nnz, o_nnz parameters are specified, the storage is specified
4424    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4425    In the above case the values for d_nnz,o_nnz are:
4426 .vb
4427      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4428      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4429      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4430 .ve
4431    Here the space allocated is sum of all the above values i.e 34, and
4432    hence pre-allocation is perfect.
4433 
4434    Level: intermediate
4435 
4436 .keywords: matrix, aij, compressed row, sparse, parallel
4437 
4438 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4439           MPIAIJ, MatCreateMPIAIJWithArrays()
4440 @*/
4441 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4442 {
4443   PetscErrorCode ierr;
4444   PetscMPIInt    size;
4445 
4446   PetscFunctionBegin;
4447   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4448   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4449   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4450   if (size > 1) {
4451     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4452     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4453   } else {
4454     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4455     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4456   }
4457   PetscFunctionReturn(0);
4458 }
4459 
4460 #undef __FUNCT__
4461 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4462 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4463 {
4464   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4465 
4466   PetscFunctionBegin;
4467   *Ad     = a->A;
4468   *Ao     = a->B;
4469   *colmap = a->garray;
4470   PetscFunctionReturn(0);
4471 }
4472 
4473 #undef __FUNCT__
4474 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4475 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4476 {
4477   PetscErrorCode ierr;
4478   PetscInt       i;
4479   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4480 
4481   PetscFunctionBegin;
4482   if (coloring->ctype == IS_COLORING_GLOBAL) {
4483     ISColoringValue *allcolors,*colors;
4484     ISColoring      ocoloring;
4485 
4486     /* set coloring for diagonal portion */
4487     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4488 
4489     /* set coloring for off-diagonal portion */
4490     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4491     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4492     for (i=0; i<a->B->cmap->n; i++) {
4493       colors[i] = allcolors[a->garray[i]];
4494     }
4495     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4496     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4497     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4498     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4499   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4500     ISColoringValue *colors;
4501     PetscInt        *larray;
4502     ISColoring      ocoloring;
4503 
4504     /* set coloring for diagonal portion */
4505     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4506     for (i=0; i<a->A->cmap->n; i++) {
4507       larray[i] = i + A->cmap->rstart;
4508     }
4509     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4510     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4511     for (i=0; i<a->A->cmap->n; i++) {
4512       colors[i] = coloring->colors[larray[i]];
4513     }
4514     ierr = PetscFree(larray);CHKERRQ(ierr);
4515     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4516     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4517     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4518 
4519     /* set coloring for off-diagonal portion */
4520     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4521     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4522     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4523     for (i=0; i<a->B->cmap->n; i++) {
4524       colors[i] = coloring->colors[larray[i]];
4525     }
4526     ierr = PetscFree(larray);CHKERRQ(ierr);
4527     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4528     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4529     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4530   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4531   PetscFunctionReturn(0);
4532 }
4533 
4534 #undef __FUNCT__
4535 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4536 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4537 {
4538   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4539   PetscErrorCode ierr;
4540 
4541   PetscFunctionBegin;
4542   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4543   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4544   PetscFunctionReturn(0);
4545 }
4546 
4547 #undef __FUNCT__
4548 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4549 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4550 {
4551   PetscErrorCode ierr;
4552   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4553   PetscInt       *indx;
4554 
4555   PetscFunctionBegin;
4556   /* This routine will ONLY return MPIAIJ type matrix */
4557   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4558   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4559   if (n == PETSC_DECIDE) {
4560     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4561   }
4562   /* Check sum(n) = N */
4563   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4564   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4565 
4566   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4567   rstart -= m;
4568 
4569   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4570   for (i=0; i<m; i++) {
4571     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4572     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4573     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4574   }
4575 
4576   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4577   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4578   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4579   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4580   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4581   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4582   PetscFunctionReturn(0);
4583 }
4584 
4585 #undef __FUNCT__
4586 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4587 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4588 {
4589   PetscErrorCode ierr;
4590   PetscInt       m,N,i,rstart,nnz,Ii;
4591   PetscInt       *indx;
4592   PetscScalar    *values;
4593 
4594   PetscFunctionBegin;
4595   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4596   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4597   for (i=0; i<m; i++) {
4598     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4599     Ii   = i + rstart;
4600     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4601     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4602   }
4603   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4604   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4605   PetscFunctionReturn(0);
4606 }
4607 
4608 #undef __FUNCT__
4609 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4610 /*@
4611       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4612                  matrices from each processor
4613 
4614     Collective on MPI_Comm
4615 
4616    Input Parameters:
4617 +    comm - the communicators the parallel matrix will live on
4618 .    inmat - the input sequential matrices
4619 .    n - number of local columns (or PETSC_DECIDE)
4620 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4621 
4622    Output Parameter:
4623 .    outmat - the parallel matrix generated
4624 
4625     Level: advanced
4626 
4627    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4628 
4629 @*/
4630 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4631 {
4632   PetscErrorCode ierr;
4633 
4634   PetscFunctionBegin;
4635   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4636   if (scall == MAT_INITIAL_MATRIX) {
4637     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4638   }
4639   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4640   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4641   PetscFunctionReturn(0);
4642 }
4643 
4644 #undef __FUNCT__
4645 #define __FUNCT__ "MatFileSplit"
4646 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4647 {
4648   PetscErrorCode    ierr;
4649   PetscMPIInt       rank;
4650   PetscInt          m,N,i,rstart,nnz;
4651   size_t            len;
4652   const PetscInt    *indx;
4653   PetscViewer       out;
4654   char              *name;
4655   Mat               B;
4656   const PetscScalar *values;
4657 
4658   PetscFunctionBegin;
4659   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4660   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4661   /* Should this be the type of the diagonal block of A? */
4662   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4663   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4664   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4665   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4666   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4667   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4668   for (i=0; i<m; i++) {
4669     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4670     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4671     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4672   }
4673   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4674   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4675 
4676   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4677   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4678   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4679   sprintf(name,"%s.%d",outfile,rank);
4680   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4681   ierr = PetscFree(name);CHKERRQ(ierr);
4682   ierr = MatView(B,out);CHKERRQ(ierr);
4683   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4684   ierr = MatDestroy(&B);CHKERRQ(ierr);
4685   PetscFunctionReturn(0);
4686 }
4687 
4688 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4689 #undef __FUNCT__
4690 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4691 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4692 {
4693   PetscErrorCode      ierr;
4694   Mat_Merge_SeqsToMPI *merge;
4695   PetscContainer      container;
4696 
4697   PetscFunctionBegin;
4698   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4699   if (container) {
4700     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4701     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4702     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4703     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4704     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4705     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4706     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4707     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4708     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4709     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4710     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4711     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4712     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4713     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4714     ierr = PetscFree(merge);CHKERRQ(ierr);
4715     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4716   }
4717   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4718   PetscFunctionReturn(0);
4719 }
4720 
4721 #include <../src/mat/utils/freespace.h>
4722 #include <petscbt.h>
4723 
4724 #undef __FUNCT__
4725 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4726 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4727 {
4728   PetscErrorCode      ierr;
4729   MPI_Comm            comm;
4730   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4731   PetscMPIInt         size,rank,taga,*len_s;
4732   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4733   PetscInt            proc,m;
4734   PetscInt            **buf_ri,**buf_rj;
4735   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4736   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4737   MPI_Request         *s_waits,*r_waits;
4738   MPI_Status          *status;
4739   MatScalar           *aa=a->a;
4740   MatScalar           **abuf_r,*ba_i;
4741   Mat_Merge_SeqsToMPI *merge;
4742   PetscContainer      container;
4743 
4744   PetscFunctionBegin;
4745   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4746   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4747 
4748   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4749   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4750 
4751   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4752   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4753 
4754   bi     = merge->bi;
4755   bj     = merge->bj;
4756   buf_ri = merge->buf_ri;
4757   buf_rj = merge->buf_rj;
4758 
4759   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4760   owners = merge->rowmap->range;
4761   len_s  = merge->len_s;
4762 
4763   /* send and recv matrix values */
4764   /*-----------------------------*/
4765   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4766   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4767 
4768   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4769   for (proc=0,k=0; proc<size; proc++) {
4770     if (!len_s[proc]) continue;
4771     i    = owners[proc];
4772     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4773     k++;
4774   }
4775 
4776   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4777   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4778   ierr = PetscFree(status);CHKERRQ(ierr);
4779 
4780   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4781   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4782 
4783   /* insert mat values of mpimat */
4784   /*----------------------------*/
4785   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4786   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4787 
4788   for (k=0; k<merge->nrecv; k++) {
4789     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4790     nrows       = *(buf_ri_k[k]);
4791     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4792     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4793   }
4794 
4795   /* set values of ba */
4796   m = merge->rowmap->n;
4797   for (i=0; i<m; i++) {
4798     arow = owners[rank] + i;
4799     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4800     bnzi = bi[i+1] - bi[i];
4801     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4802 
4803     /* add local non-zero vals of this proc's seqmat into ba */
4804     anzi   = ai[arow+1] - ai[arow];
4805     aj     = a->j + ai[arow];
4806     aa     = a->a + ai[arow];
4807     nextaj = 0;
4808     for (j=0; nextaj<anzi; j++) {
4809       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4810         ba_i[j] += aa[nextaj++];
4811       }
4812     }
4813 
4814     /* add received vals into ba */
4815     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4816       /* i-th row */
4817       if (i == *nextrow[k]) {
4818         anzi   = *(nextai[k]+1) - *nextai[k];
4819         aj     = buf_rj[k] + *(nextai[k]);
4820         aa     = abuf_r[k] + *(nextai[k]);
4821         nextaj = 0;
4822         for (j=0; nextaj<anzi; j++) {
4823           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4824             ba_i[j] += aa[nextaj++];
4825           }
4826         }
4827         nextrow[k]++; nextai[k]++;
4828       }
4829     }
4830     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4831   }
4832   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4833   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4834 
4835   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4836   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4837   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4838   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4839   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4840   PetscFunctionReturn(0);
4841 }
4842 
4843 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4844 
4845 #undef __FUNCT__
4846 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4847 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4848 {
4849   PetscErrorCode      ierr;
4850   Mat                 B_mpi;
4851   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4852   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4853   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4854   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4855   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4856   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4857   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4858   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4859   MPI_Status          *status;
4860   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4861   PetscBT             lnkbt;
4862   Mat_Merge_SeqsToMPI *merge;
4863   PetscContainer      container;
4864 
4865   PetscFunctionBegin;
4866   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4867 
4868   /* make sure it is a PETSc comm */
4869   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4870   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4871   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4872 
4873   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4874   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4875 
4876   /* determine row ownership */
4877   /*---------------------------------------------------------*/
4878   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4879   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4880   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4881   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4882   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4883   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4884   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4885 
4886   m      = merge->rowmap->n;
4887   owners = merge->rowmap->range;
4888 
4889   /* determine the number of messages to send, their lengths */
4890   /*---------------------------------------------------------*/
4891   len_s = merge->len_s;
4892 
4893   len          = 0; /* length of buf_si[] */
4894   merge->nsend = 0;
4895   for (proc=0; proc<size; proc++) {
4896     len_si[proc] = 0;
4897     if (proc == rank) {
4898       len_s[proc] = 0;
4899     } else {
4900       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4901       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4902     }
4903     if (len_s[proc]) {
4904       merge->nsend++;
4905       nrows = 0;
4906       for (i=owners[proc]; i<owners[proc+1]; i++) {
4907         if (ai[i+1] > ai[i]) nrows++;
4908       }
4909       len_si[proc] = 2*(nrows+1);
4910       len         += len_si[proc];
4911     }
4912   }
4913 
4914   /* determine the number and length of messages to receive for ij-structure */
4915   /*-------------------------------------------------------------------------*/
4916   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4917   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4918 
4919   /* post the Irecv of j-structure */
4920   /*-------------------------------*/
4921   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4922   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4923 
4924   /* post the Isend of j-structure */
4925   /*--------------------------------*/
4926   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4927 
4928   for (proc=0, k=0; proc<size; proc++) {
4929     if (!len_s[proc]) continue;
4930     i    = owners[proc];
4931     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4932     k++;
4933   }
4934 
4935   /* receives and sends of j-structure are complete */
4936   /*------------------------------------------------*/
4937   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4938   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4939 
4940   /* send and recv i-structure */
4941   /*---------------------------*/
4942   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4943   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4944 
4945   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4946   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4947   for (proc=0,k=0; proc<size; proc++) {
4948     if (!len_s[proc]) continue;
4949     /* form outgoing message for i-structure:
4950          buf_si[0]:                 nrows to be sent
4951                [1:nrows]:           row index (global)
4952                [nrows+1:2*nrows+1]: i-structure index
4953     */
4954     /*-------------------------------------------*/
4955     nrows       = len_si[proc]/2 - 1;
4956     buf_si_i    = buf_si + nrows+1;
4957     buf_si[0]   = nrows;
4958     buf_si_i[0] = 0;
4959     nrows       = 0;
4960     for (i=owners[proc]; i<owners[proc+1]; i++) {
4961       anzi = ai[i+1] - ai[i];
4962       if (anzi) {
4963         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4964         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4965         nrows++;
4966       }
4967     }
4968     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4969     k++;
4970     buf_si += len_si[proc];
4971   }
4972 
4973   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4974   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4975 
4976   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4977   for (i=0; i<merge->nrecv; i++) {
4978     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4979   }
4980 
4981   ierr = PetscFree(len_si);CHKERRQ(ierr);
4982   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4983   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4984   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4985   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4986   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4987   ierr = PetscFree(status);CHKERRQ(ierr);
4988 
4989   /* compute a local seq matrix in each processor */
4990   /*----------------------------------------------*/
4991   /* allocate bi array and free space for accumulating nonzero column info */
4992   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4993   bi[0] = 0;
4994 
4995   /* create and initialize a linked list */
4996   nlnk = N+1;
4997   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4998 
4999   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5000   len  = ai[owners[rank+1]] - ai[owners[rank]];
5001   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
5002 
5003   current_space = free_space;
5004 
5005   /* determine symbolic info for each local row */
5006   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
5007 
5008   for (k=0; k<merge->nrecv; k++) {
5009     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5010     nrows       = *buf_ri_k[k];
5011     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5012     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5013   }
5014 
5015   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5016   len  = 0;
5017   for (i=0; i<m; i++) {
5018     bnzi = 0;
5019     /* add local non-zero cols of this proc's seqmat into lnk */
5020     arow  = owners[rank] + i;
5021     anzi  = ai[arow+1] - ai[arow];
5022     aj    = a->j + ai[arow];
5023     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5024     bnzi += nlnk;
5025     /* add received col data into lnk */
5026     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5027       if (i == *nextrow[k]) { /* i-th row */
5028         anzi  = *(nextai[k]+1) - *nextai[k];
5029         aj    = buf_rj[k] + *nextai[k];
5030         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5031         bnzi += nlnk;
5032         nextrow[k]++; nextai[k]++;
5033       }
5034     }
5035     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5036 
5037     /* if free space is not available, make more free space */
5038     if (current_space->local_remaining<bnzi) {
5039       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
5040       nspacedouble++;
5041     }
5042     /* copy data into free space, then initialize lnk */
5043     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5044     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5045 
5046     current_space->array           += bnzi;
5047     current_space->local_used      += bnzi;
5048     current_space->local_remaining -= bnzi;
5049 
5050     bi[i+1] = bi[i] + bnzi;
5051   }
5052 
5053   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5054 
5055   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
5056   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5057   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5058 
5059   /* create symbolic parallel matrix B_mpi */
5060   /*---------------------------------------*/
5061   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5062   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5063   if (n==PETSC_DECIDE) {
5064     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5065   } else {
5066     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5067   }
5068   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5069   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5070   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5071   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5072   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5073 
5074   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5075   B_mpi->assembled    = PETSC_FALSE;
5076   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5077   merge->bi           = bi;
5078   merge->bj           = bj;
5079   merge->buf_ri       = buf_ri;
5080   merge->buf_rj       = buf_rj;
5081   merge->coi          = NULL;
5082   merge->coj          = NULL;
5083   merge->owners_co    = NULL;
5084 
5085   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5086 
5087   /* attach the supporting struct to B_mpi for reuse */
5088   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5089   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5090   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5091   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5092   *mpimat = B_mpi;
5093 
5094   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5095   PetscFunctionReturn(0);
5096 }
5097 
5098 #undef __FUNCT__
5099 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
5100 /*@C
5101       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
5102                  matrices from each processor
5103 
5104     Collective on MPI_Comm
5105 
5106    Input Parameters:
5107 +    comm - the communicators the parallel matrix will live on
5108 .    seqmat - the input sequential matrices
5109 .    m - number of local rows (or PETSC_DECIDE)
5110 .    n - number of local columns (or PETSC_DECIDE)
5111 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5112 
5113    Output Parameter:
5114 .    mpimat - the parallel matrix generated
5115 
5116     Level: advanced
5117 
5118    Notes:
5119      The dimensions of the sequential matrix in each processor MUST be the same.
5120      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5121      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5122 @*/
5123 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5124 {
5125   PetscErrorCode ierr;
5126   PetscMPIInt    size;
5127 
5128   PetscFunctionBegin;
5129   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5130   if (size == 1) {
5131     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5132     if (scall == MAT_INITIAL_MATRIX) {
5133       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5134     } else {
5135       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5136     }
5137     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5138     PetscFunctionReturn(0);
5139   }
5140   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5141   if (scall == MAT_INITIAL_MATRIX) {
5142     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5143   }
5144   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5145   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5146   PetscFunctionReturn(0);
5147 }
5148 
5149 #undef __FUNCT__
5150 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5151 /*@
5152      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5153           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5154           with MatGetSize()
5155 
5156     Not Collective
5157 
5158    Input Parameters:
5159 +    A - the matrix
5160 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5161 
5162    Output Parameter:
5163 .    A_loc - the local sequential matrix generated
5164 
5165     Level: developer
5166 
5167 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5168 
5169 @*/
5170 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5171 {
5172   PetscErrorCode ierr;
5173   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5174   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5175   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5176   MatScalar      *aa=a->a,*ba=b->a,*cam;
5177   PetscScalar    *ca;
5178   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5179   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5180   PetscBool      match;
5181 
5182   PetscFunctionBegin;
5183   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5184   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5185   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5186   if (scall == MAT_INITIAL_MATRIX) {
5187     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5188     ci[0] = 0;
5189     for (i=0; i<am; i++) {
5190       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5191     }
5192     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5193     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5194     k    = 0;
5195     for (i=0; i<am; i++) {
5196       ncols_o = bi[i+1] - bi[i];
5197       ncols_d = ai[i+1] - ai[i];
5198       /* off-diagonal portion of A */
5199       for (jo=0; jo<ncols_o; jo++) {
5200         col = cmap[*bj];
5201         if (col >= cstart) break;
5202         cj[k]   = col; bj++;
5203         ca[k++] = *ba++;
5204       }
5205       /* diagonal portion of A */
5206       for (j=0; j<ncols_d; j++) {
5207         cj[k]   = cstart + *aj++;
5208         ca[k++] = *aa++;
5209       }
5210       /* off-diagonal portion of A */
5211       for (j=jo; j<ncols_o; j++) {
5212         cj[k]   = cmap[*bj++];
5213         ca[k++] = *ba++;
5214       }
5215     }
5216     /* put together the new matrix */
5217     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5218     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5219     /* Since these are PETSc arrays, change flags to free them as necessary. */
5220     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5221     mat->free_a  = PETSC_TRUE;
5222     mat->free_ij = PETSC_TRUE;
5223     mat->nonew   = 0;
5224   } else if (scall == MAT_REUSE_MATRIX) {
5225     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5226     ci = mat->i; cj = mat->j; cam = mat->a;
5227     for (i=0; i<am; i++) {
5228       /* off-diagonal portion of A */
5229       ncols_o = bi[i+1] - bi[i];
5230       for (jo=0; jo<ncols_o; jo++) {
5231         col = cmap[*bj];
5232         if (col >= cstart) break;
5233         *cam++ = *ba++; bj++;
5234       }
5235       /* diagonal portion of A */
5236       ncols_d = ai[i+1] - ai[i];
5237       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5238       /* off-diagonal portion of A */
5239       for (j=jo; j<ncols_o; j++) {
5240         *cam++ = *ba++; bj++;
5241       }
5242     }
5243   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5244   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5245   PetscFunctionReturn(0);
5246 }
5247 
5248 #undef __FUNCT__
5249 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5250 /*@C
5251      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5252 
5253     Not Collective
5254 
5255    Input Parameters:
5256 +    A - the matrix
5257 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5258 -    row, col - index sets of rows and columns to extract (or NULL)
5259 
5260    Output Parameter:
5261 .    A_loc - the local sequential matrix generated
5262 
5263     Level: developer
5264 
5265 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5266 
5267 @*/
5268 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5269 {
5270   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5271   PetscErrorCode ierr;
5272   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5273   IS             isrowa,iscola;
5274   Mat            *aloc;
5275   PetscBool      match;
5276 
5277   PetscFunctionBegin;
5278   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5279   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5280   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5281   if (!row) {
5282     start = A->rmap->rstart; end = A->rmap->rend;
5283     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5284   } else {
5285     isrowa = *row;
5286   }
5287   if (!col) {
5288     start = A->cmap->rstart;
5289     cmap  = a->garray;
5290     nzA   = a->A->cmap->n;
5291     nzB   = a->B->cmap->n;
5292     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5293     ncols = 0;
5294     for (i=0; i<nzB; i++) {
5295       if (cmap[i] < start) idx[ncols++] = cmap[i];
5296       else break;
5297     }
5298     imark = i;
5299     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5300     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5301     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5302   } else {
5303     iscola = *col;
5304   }
5305   if (scall != MAT_INITIAL_MATRIX) {
5306     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5307     aloc[0] = *A_loc;
5308   }
5309   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5310   *A_loc = aloc[0];
5311   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5312   if (!row) {
5313     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5314   }
5315   if (!col) {
5316     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5317   }
5318   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5319   PetscFunctionReturn(0);
5320 }
5321 
5322 #undef __FUNCT__
5323 #define __FUNCT__ "MatGetBrowsOfAcols"
5324 /*@C
5325     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5326 
5327     Collective on Mat
5328 
5329    Input Parameters:
5330 +    A,B - the matrices in mpiaij format
5331 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5332 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5333 
5334    Output Parameter:
5335 +    rowb, colb - index sets of rows and columns of B to extract
5336 -    B_seq - the sequential matrix generated
5337 
5338     Level: developer
5339 
5340 @*/
5341 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5342 {
5343   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5344   PetscErrorCode ierr;
5345   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5346   IS             isrowb,iscolb;
5347   Mat            *bseq=NULL;
5348 
5349   PetscFunctionBegin;
5350   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5351     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5352   }
5353   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5354 
5355   if (scall == MAT_INITIAL_MATRIX) {
5356     start = A->cmap->rstart;
5357     cmap  = a->garray;
5358     nzA   = a->A->cmap->n;
5359     nzB   = a->B->cmap->n;
5360     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5361     ncols = 0;
5362     for (i=0; i<nzB; i++) {  /* row < local row index */
5363       if (cmap[i] < start) idx[ncols++] = cmap[i];
5364       else break;
5365     }
5366     imark = i;
5367     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5368     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5369     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5370     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5371   } else {
5372     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5373     isrowb  = *rowb; iscolb = *colb;
5374     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5375     bseq[0] = *B_seq;
5376   }
5377   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5378   *B_seq = bseq[0];
5379   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5380   if (!rowb) {
5381     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5382   } else {
5383     *rowb = isrowb;
5384   }
5385   if (!colb) {
5386     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5387   } else {
5388     *colb = iscolb;
5389   }
5390   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5391   PetscFunctionReturn(0);
5392 }
5393 
5394 #undef __FUNCT__
5395 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5396 /*
5397     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5398     of the OFF-DIAGONAL portion of local A
5399 
5400     Collective on Mat
5401 
5402    Input Parameters:
5403 +    A,B - the matrices in mpiaij format
5404 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5405 
5406    Output Parameter:
5407 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5408 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5409 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5410 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5411 
5412     Level: developer
5413 
5414 */
5415 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5416 {
5417   VecScatter_MPI_General *gen_to,*gen_from;
5418   PetscErrorCode         ierr;
5419   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5420   Mat_SeqAIJ             *b_oth;
5421   VecScatter             ctx =a->Mvctx;
5422   MPI_Comm               comm;
5423   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5424   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5425   PetscScalar            *rvalues,*svalues;
5426   MatScalar              *b_otha,*bufa,*bufA;
5427   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5428   MPI_Request            *rwaits = NULL,*swaits = NULL;
5429   MPI_Status             *sstatus,rstatus;
5430   PetscMPIInt            jj;
5431   PetscInt               *cols,sbs,rbs;
5432   PetscScalar            *vals;
5433 
5434   PetscFunctionBegin;
5435   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5436   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5437     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5438   }
5439   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5440   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5441 
5442   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5443   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5444   rvalues  = gen_from->values; /* holds the length of receiving row */
5445   svalues  = gen_to->values;   /* holds the length of sending row */
5446   nrecvs   = gen_from->n;
5447   nsends   = gen_to->n;
5448 
5449   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5450   srow    = gen_to->indices;    /* local row index to be sent */
5451   sstarts = gen_to->starts;
5452   sprocs  = gen_to->procs;
5453   sstatus = gen_to->sstatus;
5454   sbs     = gen_to->bs;
5455   rstarts = gen_from->starts;
5456   rprocs  = gen_from->procs;
5457   rbs     = gen_from->bs;
5458 
5459   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5460   if (scall == MAT_INITIAL_MATRIX) {
5461     /* i-array */
5462     /*---------*/
5463     /*  post receives */
5464     for (i=0; i<nrecvs; i++) {
5465       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5466       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5467       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5468     }
5469 
5470     /* pack the outgoing message */
5471     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5472 
5473     sstartsj[0] = 0;
5474     rstartsj[0] = 0;
5475     len         = 0; /* total length of j or a array to be sent */
5476     k           = 0;
5477     for (i=0; i<nsends; i++) {
5478       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5479       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5480       for (j=0; j<nrows; j++) {
5481         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5482         for (l=0; l<sbs; l++) {
5483           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5484 
5485           rowlen[j*sbs+l] = ncols;
5486 
5487           len += ncols;
5488           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5489         }
5490         k++;
5491       }
5492       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5493 
5494       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5495     }
5496     /* recvs and sends of i-array are completed */
5497     i = nrecvs;
5498     while (i--) {
5499       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5500     }
5501     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5502 
5503     /* allocate buffers for sending j and a arrays */
5504     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5505     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5506 
5507     /* create i-array of B_oth */
5508     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5509 
5510     b_othi[0] = 0;
5511     len       = 0; /* total length of j or a array to be received */
5512     k         = 0;
5513     for (i=0; i<nrecvs; i++) {
5514       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5515       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5516       for (j=0; j<nrows; j++) {
5517         b_othi[k+1] = b_othi[k] + rowlen[j];
5518         len        += rowlen[j]; k++;
5519       }
5520       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5521     }
5522 
5523     /* allocate space for j and a arrrays of B_oth */
5524     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5525     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5526 
5527     /* j-array */
5528     /*---------*/
5529     /*  post receives of j-array */
5530     for (i=0; i<nrecvs; i++) {
5531       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5532       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5533     }
5534 
5535     /* pack the outgoing message j-array */
5536     k = 0;
5537     for (i=0; i<nsends; i++) {
5538       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5539       bufJ  = bufj+sstartsj[i];
5540       for (j=0; j<nrows; j++) {
5541         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5542         for (ll=0; ll<sbs; ll++) {
5543           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5544           for (l=0; l<ncols; l++) {
5545             *bufJ++ = cols[l];
5546           }
5547           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5548         }
5549       }
5550       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5551     }
5552 
5553     /* recvs and sends of j-array are completed */
5554     i = nrecvs;
5555     while (i--) {
5556       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5557     }
5558     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5559   } else if (scall == MAT_REUSE_MATRIX) {
5560     sstartsj = *startsj_s;
5561     rstartsj = *startsj_r;
5562     bufa     = *bufa_ptr;
5563     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5564     b_otha   = b_oth->a;
5565   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5566 
5567   /* a-array */
5568   /*---------*/
5569   /*  post receives of a-array */
5570   for (i=0; i<nrecvs; i++) {
5571     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5572     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5573   }
5574 
5575   /* pack the outgoing message a-array */
5576   k = 0;
5577   for (i=0; i<nsends; i++) {
5578     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5579     bufA  = bufa+sstartsj[i];
5580     for (j=0; j<nrows; j++) {
5581       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5582       for (ll=0; ll<sbs; ll++) {
5583         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5584         for (l=0; l<ncols; l++) {
5585           *bufA++ = vals[l];
5586         }
5587         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5588       }
5589     }
5590     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5591   }
5592   /* recvs and sends of a-array are completed */
5593   i = nrecvs;
5594   while (i--) {
5595     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5596   }
5597   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5598   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5599 
5600   if (scall == MAT_INITIAL_MATRIX) {
5601     /* put together the new matrix */
5602     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5603 
5604     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5605     /* Since these are PETSc arrays, change flags to free them as necessary. */
5606     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5607     b_oth->free_a  = PETSC_TRUE;
5608     b_oth->free_ij = PETSC_TRUE;
5609     b_oth->nonew   = 0;
5610 
5611     ierr = PetscFree(bufj);CHKERRQ(ierr);
5612     if (!startsj_s || !bufa_ptr) {
5613       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5614       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5615     } else {
5616       *startsj_s = sstartsj;
5617       *startsj_r = rstartsj;
5618       *bufa_ptr  = bufa;
5619     }
5620   }
5621   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5622   PetscFunctionReturn(0);
5623 }
5624 
5625 #undef __FUNCT__
5626 #define __FUNCT__ "MatGetCommunicationStructs"
5627 /*@C
5628   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5629 
5630   Not Collective
5631 
5632   Input Parameters:
5633 . A - The matrix in mpiaij format
5634 
5635   Output Parameter:
5636 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5637 . colmap - A map from global column index to local index into lvec
5638 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5639 
5640   Level: developer
5641 
5642 @*/
5643 #if defined(PETSC_USE_CTABLE)
5644 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5645 #else
5646 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5647 #endif
5648 {
5649   Mat_MPIAIJ *a;
5650 
5651   PetscFunctionBegin;
5652   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5653   PetscValidPointer(lvec, 2);
5654   PetscValidPointer(colmap, 3);
5655   PetscValidPointer(multScatter, 4);
5656   a = (Mat_MPIAIJ*) A->data;
5657   if (lvec) *lvec = a->lvec;
5658   if (colmap) *colmap = a->colmap;
5659   if (multScatter) *multScatter = a->Mvctx;
5660   PetscFunctionReturn(0);
5661 }
5662 
5663 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5664 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5665 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5666 
5667 #undef __FUNCT__
5668 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5669 /*
5670     Computes (B'*A')' since computing B*A directly is untenable
5671 
5672                n                       p                          p
5673         (              )       (              )         (                  )
5674       m (      A       )  *  n (       B      )   =   m (         C        )
5675         (              )       (              )         (                  )
5676 
5677 */
5678 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5679 {
5680   PetscErrorCode ierr;
5681   Mat            At,Bt,Ct;
5682 
5683   PetscFunctionBegin;
5684   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5685   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5686   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5687   ierr = MatDestroy(&At);CHKERRQ(ierr);
5688   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5689   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5690   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5691   PetscFunctionReturn(0);
5692 }
5693 
5694 #undef __FUNCT__
5695 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5696 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5697 {
5698   PetscErrorCode ierr;
5699   PetscInt       m=A->rmap->n,n=B->cmap->n;
5700   Mat            Cmat;
5701 
5702   PetscFunctionBegin;
5703   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5704   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5705   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5706   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5707   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5708   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5709   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5710   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5711 
5712   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5713 
5714   *C = Cmat;
5715   PetscFunctionReturn(0);
5716 }
5717 
5718 /* ----------------------------------------------------------------*/
5719 #undef __FUNCT__
5720 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5721 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5722 {
5723   PetscErrorCode ierr;
5724 
5725   PetscFunctionBegin;
5726   if (scall == MAT_INITIAL_MATRIX) {
5727     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5728     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5729     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5730   }
5731   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5732   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5733   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5734   PetscFunctionReturn(0);
5735 }
5736 
5737 #if defined(PETSC_HAVE_MUMPS)
5738 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5739 #endif
5740 #if defined(PETSC_HAVE_PASTIX)
5741 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5742 #endif
5743 #if defined(PETSC_HAVE_SUPERLU_DIST)
5744 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5745 #endif
5746 #if defined(PETSC_HAVE_CLIQUE)
5747 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5748 #endif
5749 
5750 /*MC
5751    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5752 
5753    Options Database Keys:
5754 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5755 
5756   Level: beginner
5757 
5758 .seealso: MatCreateAIJ()
5759 M*/
5760 
5761 #undef __FUNCT__
5762 #define __FUNCT__ "MatCreate_MPIAIJ"
5763 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5764 {
5765   Mat_MPIAIJ     *b;
5766   PetscErrorCode ierr;
5767   PetscMPIInt    size;
5768 
5769   PetscFunctionBegin;
5770   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5771 
5772   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5773   B->data       = (void*)b;
5774   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5775   B->assembled  = PETSC_FALSE;
5776   B->insertmode = NOT_SET_VALUES;
5777   b->size       = size;
5778 
5779   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5780 
5781   /* build cache for off array entries formed */
5782   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5783 
5784   b->donotstash  = PETSC_FALSE;
5785   b->colmap      = 0;
5786   b->garray      = 0;
5787   b->roworiented = PETSC_TRUE;
5788 
5789   /* stuff used for matrix vector multiply */
5790   b->lvec  = NULL;
5791   b->Mvctx = NULL;
5792 
5793   /* stuff for MatGetRow() */
5794   b->rowindices   = 0;
5795   b->rowvalues    = 0;
5796   b->getrowactive = PETSC_FALSE;
5797 
5798   /* flexible pointer used in CUSP/CUSPARSE classes */
5799   b->spptr = NULL;
5800 
5801 #if defined(PETSC_HAVE_MUMPS)
5802   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5803 #endif
5804 #if defined(PETSC_HAVE_PASTIX)
5805   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5806 #endif
5807 #if defined(PETSC_HAVE_SUPERLU_DIST)
5808   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5809 #endif
5810 #if defined(PETSC_HAVE_CLIQUE)
5811   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5812 #endif
5813   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5814   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5815   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5816   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5817   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5818   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5819   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5820   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5821   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5822   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5823   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5824   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5825   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5826   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5827   PetscFunctionReturn(0);
5828 }
5829 
5830 #undef __FUNCT__
5831 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5832 /*@
5833      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5834          and "off-diagonal" part of the matrix in CSR format.
5835 
5836    Collective on MPI_Comm
5837 
5838    Input Parameters:
5839 +  comm - MPI communicator
5840 .  m - number of local rows (Cannot be PETSC_DECIDE)
5841 .  n - This value should be the same as the local size used in creating the
5842        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5843        calculated if N is given) For square matrices n is almost always m.
5844 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5845 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5846 .   i - row indices for "diagonal" portion of matrix
5847 .   j - column indices
5848 .   a - matrix values
5849 .   oi - row indices for "off-diagonal" portion of matrix
5850 .   oj - column indices
5851 -   oa - matrix values
5852 
5853    Output Parameter:
5854 .   mat - the matrix
5855 
5856    Level: advanced
5857 
5858    Notes:
5859        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5860        must free the arrays once the matrix has been destroyed and not before.
5861 
5862        The i and j indices are 0 based
5863 
5864        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5865 
5866        This sets local rows and cannot be used to set off-processor values.
5867 
5868        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5869        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5870        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5871        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5872        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5873        communication if it is known that only local entries will be set.
5874 
5875 .keywords: matrix, aij, compressed row, sparse, parallel
5876 
5877 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5878           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5879 @*/
5880 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5881 {
5882   PetscErrorCode ierr;
5883   Mat_MPIAIJ     *maij;
5884 
5885   PetscFunctionBegin;
5886   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5887   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5888   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5889   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5890   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5891   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5892   maij = (Mat_MPIAIJ*) (*mat)->data;
5893 
5894   (*mat)->preallocated = PETSC_TRUE;
5895 
5896   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5897   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5898 
5899   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5900   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5901 
5902   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5903   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5904   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5905   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5906 
5907   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5908   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5909   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5910   PetscFunctionReturn(0);
5911 }
5912 
5913 /*
5914     Special version for direct calls from Fortran
5915 */
5916 #include <petsc-private/fortranimpl.h>
5917 
5918 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5919 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5920 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5921 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5922 #endif
5923 
5924 /* Change these macros so can be used in void function */
5925 #undef CHKERRQ
5926 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5927 #undef SETERRQ2
5928 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5929 #undef SETERRQ3
5930 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5931 #undef SETERRQ
5932 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5933 
5934 #undef __FUNCT__
5935 #define __FUNCT__ "matsetvaluesmpiaij_"
5936 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5937 {
5938   Mat            mat  = *mmat;
5939   PetscInt       m    = *mm, n = *mn;
5940   InsertMode     addv = *maddv;
5941   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5942   PetscScalar    value;
5943   PetscErrorCode ierr;
5944 
5945   MatCheckPreallocated(mat,1);
5946   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5947 
5948 #if defined(PETSC_USE_DEBUG)
5949   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5950 #endif
5951   {
5952     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5953     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5954     PetscBool roworiented = aij->roworiented;
5955 
5956     /* Some Variables required in the macro */
5957     Mat        A                 = aij->A;
5958     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5959     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5960     MatScalar  *aa               = a->a;
5961     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5962     Mat        B                 = aij->B;
5963     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5964     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5965     MatScalar  *ba               = b->a;
5966 
5967     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5968     PetscInt  nonew = a->nonew;
5969     MatScalar *ap1,*ap2;
5970 
5971     PetscFunctionBegin;
5972     for (i=0; i<m; i++) {
5973       if (im[i] < 0) continue;
5974 #if defined(PETSC_USE_DEBUG)
5975       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5976 #endif
5977       if (im[i] >= rstart && im[i] < rend) {
5978         row      = im[i] - rstart;
5979         lastcol1 = -1;
5980         rp1      = aj + ai[row];
5981         ap1      = aa + ai[row];
5982         rmax1    = aimax[row];
5983         nrow1    = ailen[row];
5984         low1     = 0;
5985         high1    = nrow1;
5986         lastcol2 = -1;
5987         rp2      = bj + bi[row];
5988         ap2      = ba + bi[row];
5989         rmax2    = bimax[row];
5990         nrow2    = bilen[row];
5991         low2     = 0;
5992         high2    = nrow2;
5993 
5994         for (j=0; j<n; j++) {
5995           if (roworiented) value = v[i*n+j];
5996           else value = v[i+j*m];
5997           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5998           if (in[j] >= cstart && in[j] < cend) {
5999             col = in[j] - cstart;
6000             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
6001           } else if (in[j] < 0) continue;
6002 #if defined(PETSC_USE_DEBUG)
6003           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6004 #endif
6005           else {
6006             if (mat->was_assembled) {
6007               if (!aij->colmap) {
6008                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6009               }
6010 #if defined(PETSC_USE_CTABLE)
6011               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6012               col--;
6013 #else
6014               col = aij->colmap[in[j]] - 1;
6015 #endif
6016               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6017                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6018                 col  =  in[j];
6019                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6020                 B     = aij->B;
6021                 b     = (Mat_SeqAIJ*)B->data;
6022                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6023                 rp2   = bj + bi[row];
6024                 ap2   = ba + bi[row];
6025                 rmax2 = bimax[row];
6026                 nrow2 = bilen[row];
6027                 low2  = 0;
6028                 high2 = nrow2;
6029                 bm    = aij->B->rmap->n;
6030                 ba    = b->a;
6031               }
6032             } else col = in[j];
6033             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
6034           }
6035         }
6036       } else if (!aij->donotstash) {
6037         if (roworiented) {
6038           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6039         } else {
6040           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6041         }
6042       }
6043     }
6044   }
6045   PetscFunctionReturnVoid();
6046 }
6047 
6048