xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f4703a446aa471fe3417d46305cc6524dd787bd0)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatDestroy_MatRedundant"
2512 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2513 {
2514   PetscErrorCode ierr;
2515   Mat_Redundant  *redund;
2516   PetscInt       i;
2517   PetscMPIInt    size;
2518 
2519   PetscFunctionBegin;
2520   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2521   if (size == 1) {
2522     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2523     redund = a->redundant;
2524   } else {
2525     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2526     redund = a->redundant;
2527   }
2528   if (redund){
2529     if (redund->matseq) { /* via MatGetSubMatrices()  */
2530       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2531       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2532       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2533       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2534     } else {
2535       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2536       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2537       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2538       for (i=0; i<redund->nrecvs; i++) {
2539         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2540         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2541       }
2542       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2543     }
2544 
2545     if (redund->psubcomm) {
2546       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2547     }
2548     ierr = redund->Destroy(A);CHKERRQ(ierr);
2549     ierr = PetscFree(redund);CHKERRQ(ierr);
2550   }
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm"
2556 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2557 {
2558   PetscMPIInt    rank,size;
2559   MPI_Comm       comm,subcomm=psubcomm->comm;
2560   PetscErrorCode ierr;
2561   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2562   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2563   PetscInt       *rowrange = mat->rmap->range;
2564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2565   Mat            A = aij->A,B=aij->B,C=*matredundant;
2566   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2567   PetscScalar    *sbuf_a;
2568   PetscInt       nzlocal=a->nz+b->nz;
2569   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2570   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2571   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2572   MatScalar      *aworkA,*aworkB;
2573   PetscScalar    *vals;
2574   PetscMPIInt    tag1,tag2,tag3,imdex;
2575   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2576   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2577   MPI_Status     recv_status,*send_status;
2578   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2579   PetscInt       **rbuf_j=NULL;
2580   PetscScalar    **rbuf_a=NULL;
2581   Mat_Redundant  *redund =NULL;
2582 
2583   PetscFunctionBegin;
2584   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2585   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2586   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2587   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2588   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2589 
2590   if (reuse == MAT_REUSE_MATRIX) {
2591     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2592     if (subsize == 1) {
2593       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2594       redund = c->redundant;
2595     } else {
2596       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2597       redund = c->redundant;
2598     }
2599     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2600 
2601     nsends    = redund->nsends;
2602     nrecvs    = redund->nrecvs;
2603     send_rank = redund->send_rank;
2604     recv_rank = redund->recv_rank;
2605     sbuf_nz   = redund->sbuf_nz;
2606     rbuf_nz   = redund->rbuf_nz;
2607     sbuf_j    = redund->sbuf_j;
2608     sbuf_a    = redund->sbuf_a;
2609     rbuf_j    = redund->rbuf_j;
2610     rbuf_a    = redund->rbuf_a;
2611   }
2612 
2613   if (reuse == MAT_INITIAL_MATRIX) {
2614     PetscInt    nleftover,np_subcomm;
2615 
2616     /* get the destination processors' id send_rank, nsends and nrecvs */
2617     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2618 
2619     np_subcomm = size/nsubcomm;
2620     nleftover  = size - nsubcomm*np_subcomm;
2621 
2622     nsends = 0; nrecvs = 0;
2623     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2624       /* -------------------------------------------*/
2625       for (i=0; i<size; i++) {
2626         if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2627           send_rank[nsends]   = i; nsends++;
2628           recv_rank[nrecvs++] = i;
2629           /* printf("[%d] send to and recv from [%d]\n",rank,i); */
2630         }
2631       }
2632       if (rank >= size - nleftover) { /* this proc is a leftover processor */
2633         i = size-nleftover-1;
2634         j = 0;
2635         while (j < nsubcomm - nleftover) {
2636           send_rank[nsends++] = i;
2637           i--; j++;
2638           /* printf("[%d] send to [%d]\n",rank,i); */
2639         }
2640       }
2641 
2642       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2643         for (i=0; i<nleftover; i++) {
2644           recv_rank[nrecvs++] = size-nleftover+i;
2645           /* printf("[%d] recv from [%d]\n",rank,i); */
2646         }
2647       }
2648     } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) {
2649       /* --------------------------------------------------*/
2650       PetscInt color,subcommstart;
2651       subcommstart=0;
2652       for (color=0; color<nsubcomm; color++) {
2653         if (psubcomm->color != color) {
2654           for (i=0; i<psubcomm->subsize[color]; i++) {
2655             if (subrank == i) { /* my_subrank == other's subrank */
2656               send_rank[nsends++] = subcommstart+i;
2657               recv_rank[nrecvs++] = subcommstart+i;
2658               /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */
2659             }
2660           }
2661         }
2662         subcommstart += psubcomm->subsize[color];
2663       }
2664       if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */
2665         subcommstart=0;
2666         for (color=0; color<nsubcomm; color++) {
2667           subcommstart += psubcomm->subsize[color];
2668           if (psubcomm->color == color) continue;
2669           if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */
2670             send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */
2671             /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */
2672           }
2673         }
2674       }
2675 
2676       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2677         subcommstart=0;
2678         for (color=0; color<nsubcomm; color++) {
2679           subcommstart += psubcomm->subsize[color];
2680           if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */
2681             recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */
2682             /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */
2683           }
2684         }
2685       }
2686     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for PetscSubcomm type %D",psubcomm->type);
2687 
2688     /* allocate sbuf_j, sbuf_a */
2689     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2690     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2691     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2692     /*
2693     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2694     ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
2695      */
2696   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2697 
2698   /* copy mat's local entries into the buffers */
2699   if (reuse == MAT_INITIAL_MATRIX) {
2700     rownz_max = 0;
2701     rptr      = sbuf_j;
2702     cols      = sbuf_j + rend-rstart + 1;
2703     vals      = sbuf_a;
2704     rptr[0]   = 0;
2705     for (i=0; i<rend-rstart; i++) {
2706       row    = i + rstart;
2707       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2708       ncols  = nzA + nzB;
2709       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2710       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2711       /* load the column indices for this row into cols */
2712       lwrite = 0;
2713       for (l=0; l<nzB; l++) {
2714         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2715           vals[lwrite]   = aworkB[l];
2716           cols[lwrite++] = ctmp;
2717         }
2718       }
2719       for (l=0; l<nzA; l++) {
2720         vals[lwrite]   = aworkA[l];
2721         cols[lwrite++] = cstart + cworkA[l];
2722       }
2723       for (l=0; l<nzB; l++) {
2724         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2725           vals[lwrite]   = aworkB[l];
2726           cols[lwrite++] = ctmp;
2727         }
2728       }
2729       vals     += ncols;
2730       cols     += ncols;
2731       rptr[i+1] = rptr[i] + ncols;
2732       if (rownz_max < ncols) rownz_max = ncols;
2733     }
2734     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2735   } else { /* only copy matrix values into sbuf_a */
2736     rptr    = sbuf_j;
2737     vals    = sbuf_a;
2738     rptr[0] = 0;
2739     for (i=0; i<rend-rstart; i++) {
2740       row    = i + rstart;
2741       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2742       ncols  = nzA + nzB;
2743       cworkB = b->j + b->i[i];
2744       aworkA = a->a + a->i[i];
2745       aworkB = b->a + b->i[i];
2746       lwrite = 0;
2747       for (l=0; l<nzB; l++) {
2748         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2749       }
2750       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2751       for (l=0; l<nzB; l++) {
2752         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2753       }
2754       vals     += ncols;
2755       rptr[i+1] = rptr[i] + ncols;
2756     }
2757   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2758 
2759   /* send nzlocal to others, and recv other's nzlocal */
2760   /*--------------------------------------------------*/
2761   if (reuse == MAT_INITIAL_MATRIX) {
2762     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2763 
2764     s_waits2 = s_waits3 + nsends;
2765     s_waits1 = s_waits2 + nsends;
2766     r_waits1 = s_waits1 + nsends;
2767     r_waits2 = r_waits1 + nrecvs;
2768     r_waits3 = r_waits2 + nrecvs;
2769   } else {
2770     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2771 
2772     r_waits3 = s_waits3 + nsends;
2773   }
2774 
2775   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2776   if (reuse == MAT_INITIAL_MATRIX) {
2777     /* get new tags to keep the communication clean */
2778     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2779     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2780     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2781 
2782     /* post receives of other's nzlocal */
2783     for (i=0; i<nrecvs; i++) {
2784       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2785     }
2786     /* send nzlocal to others */
2787     for (i=0; i<nsends; i++) {
2788       sbuf_nz[i] = nzlocal;
2789       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2790     }
2791     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2792     count = nrecvs;
2793     while (count) {
2794       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2795 
2796       recv_rank[imdex] = recv_status.MPI_SOURCE;
2797       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2798       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2799 
2800       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2801 
2802       rbuf_nz[imdex] += i + 2;
2803 
2804       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2805       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2806       count--;
2807     }
2808     /* wait on sends of nzlocal */
2809     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2810     /* send mat->i,j to others, and recv from other's */
2811     /*------------------------------------------------*/
2812     for (i=0; i<nsends; i++) {
2813       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2814       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2815     }
2816     /* wait on receives of mat->i,j */
2817     /*------------------------------*/
2818     count = nrecvs;
2819     while (count) {
2820       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2821       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2822       count--;
2823     }
2824     /* wait on sends of mat->i,j */
2825     /*---------------------------*/
2826     if (nsends) {
2827       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2828     }
2829   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2830 
2831   /* post receives, send and receive mat->a */
2832   /*----------------------------------------*/
2833   for (imdex=0; imdex<nrecvs; imdex++) {
2834     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2835   }
2836   for (i=0; i<nsends; i++) {
2837     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2838   }
2839   count = nrecvs;
2840   while (count) {
2841     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2842     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2843     count--;
2844   }
2845   if (nsends) {
2846     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2847   }
2848 
2849   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2850 
2851   /* create redundant matrix */
2852   /*-------------------------*/
2853   if (reuse == MAT_INITIAL_MATRIX) {
2854     const PetscInt *range;
2855     PetscInt       rstart_sub,rend_sub,mloc_sub;
2856 
2857     /* compute rownz_max for preallocation */
2858     for (imdex=0; imdex<nrecvs; imdex++) {
2859       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2860       rptr = rbuf_j[imdex];
2861       for (i=0; i<j; i++) {
2862         ncols = rptr[i+1] - rptr[i];
2863         if (rownz_max < ncols) rownz_max = ncols;
2864       }
2865     }
2866 
2867     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2868 
2869     /* get local size of redundant matrix
2870        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2871     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2872     rstart_sub = range[nsubcomm*subrank];
2873     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2874       rend_sub = range[nsubcomm*(subrank+1)];
2875     } else {
2876       rend_sub = mat->rmap->N;
2877     }
2878     mloc_sub = rend_sub - rstart_sub;
2879 
2880     if (M == N) {
2881       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2882     } else { /* non-square matrix */
2883       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2884     }
2885     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2886     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2887     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2888     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2889   } else {
2890     C = *matredundant;
2891   }
2892 
2893   /* insert local matrix entries */
2894   rptr = sbuf_j;
2895   cols = sbuf_j + rend-rstart + 1;
2896   vals = sbuf_a;
2897   for (i=0; i<rend-rstart; i++) {
2898     row   = i + rstart;
2899     ncols = rptr[i+1] - rptr[i];
2900     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2901     vals += ncols;
2902     cols += ncols;
2903   }
2904   /* insert received matrix entries */
2905   for (imdex=0; imdex<nrecvs; imdex++) {
2906     rstart = rowrange[recv_rank[imdex]];
2907     rend   = rowrange[recv_rank[imdex]+1];
2908     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2909     rptr   = rbuf_j[imdex];
2910     cols   = rbuf_j[imdex] + rend-rstart + 1;
2911     vals   = rbuf_a[imdex];
2912     for (i=0; i<rend-rstart; i++) {
2913       row   = i + rstart;
2914       ncols = rptr[i+1] - rptr[i];
2915       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2916       vals += ncols;
2917       cols += ncols;
2918     }
2919   }
2920   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2921   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2922 
2923   if (reuse == MAT_INITIAL_MATRIX) {
2924     *matredundant = C;
2925 
2926     /* create a supporting struct and attach it to C for reuse */
2927     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2928     if (subsize == 1) {
2929       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2930       c->redundant = redund;
2931     } else {
2932       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2933       c->redundant = redund;
2934     }
2935 
2936     redund->nzlocal   = nzlocal;
2937     redund->nsends    = nsends;
2938     redund->nrecvs    = nrecvs;
2939     redund->send_rank = send_rank;
2940     redund->recv_rank = recv_rank;
2941     redund->sbuf_nz   = sbuf_nz;
2942     redund->rbuf_nz   = rbuf_nz;
2943     redund->sbuf_j    = sbuf_j;
2944     redund->sbuf_a    = sbuf_a;
2945     redund->rbuf_j    = rbuf_j;
2946     redund->rbuf_a    = rbuf_a;
2947     redund->psubcomm  = NULL;
2948 
2949     redund->Destroy = C->ops->destroy;
2950     C->ops->destroy = MatDestroy_MatRedundant;
2951   }
2952   PetscFunctionReturn(0);
2953 }
2954 
2955 #undef __FUNCT__
2956 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2957 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2958 {
2959   PetscErrorCode ierr;
2960   MPI_Comm       comm;
2961   PetscMPIInt    size,subsize;
2962   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2963   Mat_Redundant  *redund =NULL;
2964   PetscSubcomm   psubcomm_in=psubcomm;
2965   MPI_Comm       subcomm_in=subcomm;
2966 
2967   PetscFunctionBegin;
2968   if (subcomm_in == MPI_COMM_NULL) { /* use psubcomm */
2969     if (reuse ==  MAT_INITIAL_MATRIX) {
2970       if (psubcomm_in == NULL) { /* user does not provide psubcomm, create it here */
2971         ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2972         ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2973         ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2974         ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2975         ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2976         ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2977       }
2978       subcomm = psubcomm->comm;
2979       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2980     } else { /* retrieve psubcomm */
2981       if (psubcomm_in == NULL) { /* user does not provide psubcomm */
2982         ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2983         ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2984         if (subsize == 1) {
2985           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2986           psubcomm = c->redundant->psubcomm;
2987         } else {
2988           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2989           psubcomm = c->redundant->psubcomm;
2990         }
2991       } else { /* use psubcomm_in */
2992         subcomm = psubcomm_in->comm;
2993         ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2994       }
2995     }
2996   }
2997 
2998   if (subcomm_in == MPI_COMM_NULL && psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2999      ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr);
3000   } else {
3001     /* via MatGetSubMatrices() */
3002     Mat  *matseq;
3003     IS   isrow,iscol;
3004 
3005     if (reuse == MAT_INITIAL_MATRIX) {
3006       /* create a local sequential matrix matseq[0] */
3007       mloc_sub = PETSC_DECIDE;
3008       ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
3009       ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
3010       rstart = rend - mloc_sub;
3011       ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
3012       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
3013     } else { /* reuse == MAT_REUSE_MATRIX */
3014       if (subsize == 1) {
3015         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
3016         redund = c->redundant;
3017       } else {
3018         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3019         redund = c->redundant;
3020       }
3021 
3022       isrow  = redund->isrow;
3023       iscol  = redund->iscol;
3024       matseq = redund->matseq;
3025     }
3026     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
3027     ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
3028 
3029     if (reuse == MAT_INITIAL_MATRIX) {
3030       /* create a supporting struct and attach it to C for reuse */
3031       ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr);
3032       if (subsize == 1) {
3033         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
3034         c->redundant = redund;
3035       } else {
3036         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3037         c->redundant = redund;
3038       }
3039       redund->isrow    = isrow;
3040       redund->iscol    = iscol;
3041       redund->matseq   = matseq;
3042       redund->psubcomm = NULL;
3043       redund->Destroy               = (*matredundant)->ops->destroy;
3044       (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
3045     }
3046   }
3047 
3048   if (psubcomm && psubcomm_in==NULL) {
3049     /* if psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
3050     ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
3051     if (subsize == 1) {
3052       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
3053       c->redundant->psubcomm = psubcomm;
3054     } else {
3055       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3056       c->redundant->psubcomm = psubcomm ;
3057     }
3058   }
3059   PetscFunctionReturn(0);
3060 }
3061 
3062 #undef __FUNCT__
3063 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
3064 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3065 {
3066   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3067   PetscErrorCode ierr;
3068   PetscInt       i,*idxb = 0;
3069   PetscScalar    *va,*vb;
3070   Vec            vtmp;
3071 
3072   PetscFunctionBegin;
3073   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
3074   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3075   if (idx) {
3076     for (i=0; i<A->rmap->n; i++) {
3077       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3078     }
3079   }
3080 
3081   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3082   if (idx) {
3083     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3084   }
3085   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3086   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3087 
3088   for (i=0; i<A->rmap->n; i++) {
3089     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3090       va[i] = vb[i];
3091       if (idx) idx[i] = a->garray[idxb[i]];
3092     }
3093   }
3094 
3095   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3096   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3097   ierr = PetscFree(idxb);CHKERRQ(ierr);
3098   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3099   PetscFunctionReturn(0);
3100 }
3101 
3102 #undef __FUNCT__
3103 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
3104 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3105 {
3106   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3107   PetscErrorCode ierr;
3108   PetscInt       i,*idxb = 0;
3109   PetscScalar    *va,*vb;
3110   Vec            vtmp;
3111 
3112   PetscFunctionBegin;
3113   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
3114   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3115   if (idx) {
3116     for (i=0; i<A->cmap->n; i++) {
3117       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3118     }
3119   }
3120 
3121   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3122   if (idx) {
3123     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3124   }
3125   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3126   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3127 
3128   for (i=0; i<A->rmap->n; i++) {
3129     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3130       va[i] = vb[i];
3131       if (idx) idx[i] = a->garray[idxb[i]];
3132     }
3133   }
3134 
3135   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3136   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3137   ierr = PetscFree(idxb);CHKERRQ(ierr);
3138   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3139   PetscFunctionReturn(0);
3140 }
3141 
3142 #undef __FUNCT__
3143 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3144 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3145 {
3146   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3147   PetscInt       n      = A->rmap->n;
3148   PetscInt       cstart = A->cmap->rstart;
3149   PetscInt       *cmap  = mat->garray;
3150   PetscInt       *diagIdx, *offdiagIdx;
3151   Vec            diagV, offdiagV;
3152   PetscScalar    *a, *diagA, *offdiagA;
3153   PetscInt       r;
3154   PetscErrorCode ierr;
3155 
3156   PetscFunctionBegin;
3157   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3158   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3159   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3160   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3161   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3162   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3163   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3164   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3165   for (r = 0; r < n; ++r) {
3166     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3167       a[r]   = diagA[r];
3168       idx[r] = cstart + diagIdx[r];
3169     } else {
3170       a[r]   = offdiagA[r];
3171       idx[r] = cmap[offdiagIdx[r]];
3172     }
3173   }
3174   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3175   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3176   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3177   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3178   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3179   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3180   PetscFunctionReturn(0);
3181 }
3182 
3183 #undef __FUNCT__
3184 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3185 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3186 {
3187   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3188   PetscInt       n      = A->rmap->n;
3189   PetscInt       cstart = A->cmap->rstart;
3190   PetscInt       *cmap  = mat->garray;
3191   PetscInt       *diagIdx, *offdiagIdx;
3192   Vec            diagV, offdiagV;
3193   PetscScalar    *a, *diagA, *offdiagA;
3194   PetscInt       r;
3195   PetscErrorCode ierr;
3196 
3197   PetscFunctionBegin;
3198   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3199   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3200   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3201   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3202   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3203   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3204   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3205   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3206   for (r = 0; r < n; ++r) {
3207     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3208       a[r]   = diagA[r];
3209       idx[r] = cstart + diagIdx[r];
3210     } else {
3211       a[r]   = offdiagA[r];
3212       idx[r] = cmap[offdiagIdx[r]];
3213     }
3214   }
3215   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3216   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3217   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3218   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3219   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3220   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3221   PetscFunctionReturn(0);
3222 }
3223 
3224 #undef __FUNCT__
3225 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3226 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3227 {
3228   PetscErrorCode ierr;
3229   Mat            *dummy;
3230 
3231   PetscFunctionBegin;
3232   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3233   *newmat = *dummy;
3234   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3235   PetscFunctionReturn(0);
3236 }
3237 
3238 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3239 
3240 #undef __FUNCT__
3241 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3242 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3243 {
3244   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3245   PetscErrorCode ierr;
3246 
3247   PetscFunctionBegin;
3248   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3249   PetscFunctionReturn(0);
3250 }
3251 
3252 #undef __FUNCT__
3253 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3254 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3255 {
3256   PetscErrorCode ierr;
3257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3258 
3259   PetscFunctionBegin;
3260   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3261   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3262   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3263   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 /* -------------------------------------------------------------------*/
3268 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3269                                        MatGetRow_MPIAIJ,
3270                                        MatRestoreRow_MPIAIJ,
3271                                        MatMult_MPIAIJ,
3272                                 /* 4*/ MatMultAdd_MPIAIJ,
3273                                        MatMultTranspose_MPIAIJ,
3274                                        MatMultTransposeAdd_MPIAIJ,
3275 #if defined(PETSC_HAVE_PBGL)
3276                                        MatSolve_MPIAIJ,
3277 #else
3278                                        0,
3279 #endif
3280                                        0,
3281                                        0,
3282                                 /*10*/ 0,
3283                                        0,
3284                                        0,
3285                                        MatSOR_MPIAIJ,
3286                                        MatTranspose_MPIAIJ,
3287                                 /*15*/ MatGetInfo_MPIAIJ,
3288                                        MatEqual_MPIAIJ,
3289                                        MatGetDiagonal_MPIAIJ,
3290                                        MatDiagonalScale_MPIAIJ,
3291                                        MatNorm_MPIAIJ,
3292                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3293                                        MatAssemblyEnd_MPIAIJ,
3294                                        MatSetOption_MPIAIJ,
3295                                        MatZeroEntries_MPIAIJ,
3296                                 /*24*/ MatZeroRows_MPIAIJ,
3297                                        0,
3298 #if defined(PETSC_HAVE_PBGL)
3299                                        0,
3300 #else
3301                                        0,
3302 #endif
3303                                        0,
3304                                        0,
3305                                 /*29*/ MatSetUp_MPIAIJ,
3306 #if defined(PETSC_HAVE_PBGL)
3307                                        0,
3308 #else
3309                                        0,
3310 #endif
3311                                        0,
3312                                        0,
3313                                        0,
3314                                 /*34*/ MatDuplicate_MPIAIJ,
3315                                        0,
3316                                        0,
3317                                        0,
3318                                        0,
3319                                 /*39*/ MatAXPY_MPIAIJ,
3320                                        MatGetSubMatrices_MPIAIJ,
3321                                        MatIncreaseOverlap_MPIAIJ,
3322                                        MatGetValues_MPIAIJ,
3323                                        MatCopy_MPIAIJ,
3324                                 /*44*/ MatGetRowMax_MPIAIJ,
3325                                        MatScale_MPIAIJ,
3326                                        0,
3327                                        0,
3328                                        MatZeroRowsColumns_MPIAIJ,
3329                                 /*49*/ MatSetRandom_MPIAIJ,
3330                                        0,
3331                                        0,
3332                                        0,
3333                                        0,
3334                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3335                                        0,
3336                                        MatSetUnfactored_MPIAIJ,
3337                                        MatPermute_MPIAIJ,
3338                                        0,
3339                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3340                                        MatDestroy_MPIAIJ,
3341                                        MatView_MPIAIJ,
3342                                        0,
3343                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3344                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3345                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3346                                        0,
3347                                        0,
3348                                        0,
3349                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3350                                        MatGetRowMinAbs_MPIAIJ,
3351                                        0,
3352                                        MatSetColoring_MPIAIJ,
3353                                        0,
3354                                        MatSetValuesAdifor_MPIAIJ,
3355                                 /*75*/ MatFDColoringApply_AIJ,
3356                                        0,
3357                                        0,
3358                                        0,
3359                                        MatFindZeroDiagonals_MPIAIJ,
3360                                 /*80*/ 0,
3361                                        0,
3362                                        0,
3363                                 /*83*/ MatLoad_MPIAIJ,
3364                                        0,
3365                                        0,
3366                                        0,
3367                                        0,
3368                                        0,
3369                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3370                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3371                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3372                                        MatPtAP_MPIAIJ_MPIAIJ,
3373                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3374                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3375                                        0,
3376                                        0,
3377                                        0,
3378                                        0,
3379                                 /*99*/ 0,
3380                                        0,
3381                                        0,
3382                                        MatConjugate_MPIAIJ,
3383                                        0,
3384                                 /*104*/MatSetValuesRow_MPIAIJ,
3385                                        MatRealPart_MPIAIJ,
3386                                        MatImaginaryPart_MPIAIJ,
3387                                        0,
3388                                        0,
3389                                 /*109*/0,
3390                                        MatGetRedundantMatrix_MPIAIJ,
3391                                        MatGetRowMin_MPIAIJ,
3392                                        0,
3393                                        0,
3394                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3395                                        0,
3396                                        0,
3397                                        0,
3398                                        0,
3399                                 /*119*/0,
3400                                        0,
3401                                        0,
3402                                        0,
3403                                        MatGetMultiProcBlock_MPIAIJ,
3404                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3405                                        MatGetColumnNorms_MPIAIJ,
3406                                        MatInvertBlockDiagonal_MPIAIJ,
3407                                        0,
3408                                        MatGetSubMatricesParallel_MPIAIJ,
3409                                 /*129*/0,
3410                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3411                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3412                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3413                                        0,
3414                                 /*134*/0,
3415                                        0,
3416                                        0,
3417                                        0,
3418                                        0,
3419                                 /*139*/0,
3420                                        0
3421 };
3422 
3423 /* ----------------------------------------------------------------------------------------*/
3424 
3425 #undef __FUNCT__
3426 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3427 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3428 {
3429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3430   PetscErrorCode ierr;
3431 
3432   PetscFunctionBegin;
3433   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3434   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3435   PetscFunctionReturn(0);
3436 }
3437 
3438 #undef __FUNCT__
3439 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3440 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3441 {
3442   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3443   PetscErrorCode ierr;
3444 
3445   PetscFunctionBegin;
3446   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3447   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3448   PetscFunctionReturn(0);
3449 }
3450 
3451 #undef __FUNCT__
3452 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3453 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3454 {
3455   Mat_MPIAIJ     *b;
3456   PetscErrorCode ierr;
3457 
3458   PetscFunctionBegin;
3459   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3460   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3461   b = (Mat_MPIAIJ*)B->data;
3462 
3463   if (!B->preallocated) {
3464     /* Explicitly create 2 MATSEQAIJ matrices. */
3465     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3466     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3467     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3468     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3469     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3470     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3471     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3472     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3473     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3474     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3475   }
3476 
3477   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3478   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3479   B->preallocated = PETSC_TRUE;
3480   PetscFunctionReturn(0);
3481 }
3482 
3483 #undef __FUNCT__
3484 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3485 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3486 {
3487   Mat            mat;
3488   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3489   PetscErrorCode ierr;
3490 
3491   PetscFunctionBegin;
3492   *newmat = 0;
3493   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3494   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3495   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3496   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3497   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3498   a       = (Mat_MPIAIJ*)mat->data;
3499 
3500   mat->factortype   = matin->factortype;
3501   mat->rmap->bs     = matin->rmap->bs;
3502   mat->cmap->bs     = matin->cmap->bs;
3503   mat->assembled    = PETSC_TRUE;
3504   mat->insertmode   = NOT_SET_VALUES;
3505   mat->preallocated = PETSC_TRUE;
3506 
3507   a->size         = oldmat->size;
3508   a->rank         = oldmat->rank;
3509   a->donotstash   = oldmat->donotstash;
3510   a->roworiented  = oldmat->roworiented;
3511   a->rowindices   = 0;
3512   a->rowvalues    = 0;
3513   a->getrowactive = PETSC_FALSE;
3514 
3515   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3516   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3517 
3518   if (oldmat->colmap) {
3519 #if defined(PETSC_USE_CTABLE)
3520     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3521 #else
3522     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3523     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3524     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3525 #endif
3526   } else a->colmap = 0;
3527   if (oldmat->garray) {
3528     PetscInt len;
3529     len  = oldmat->B->cmap->n;
3530     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3531     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3532     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3533   } else a->garray = 0;
3534 
3535   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3536   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3537   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3538   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3539   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3540   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3541   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3542   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3543   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3544   *newmat = mat;
3545   PetscFunctionReturn(0);
3546 }
3547 
3548 
3549 
3550 #undef __FUNCT__
3551 #define __FUNCT__ "MatLoad_MPIAIJ"
3552 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3553 {
3554   PetscScalar    *vals,*svals;
3555   MPI_Comm       comm;
3556   PetscErrorCode ierr;
3557   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3558   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3559   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3560   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3561   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3562   int            fd;
3563   PetscInt       bs = 1;
3564 
3565   PetscFunctionBegin;
3566   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3567   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3568   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3569   if (!rank) {
3570     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3571     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3572     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3573   }
3574 
3575   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3576   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3577   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3578 
3579   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3580 
3581   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3582   M    = header[1]; N = header[2];
3583   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3584   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3585   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3586 
3587   /* If global sizes are set, check if they are consistent with that given in the file */
3588   if (sizesset) {
3589     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3590   }
3591   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3592   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3593 
3594   /* determine ownership of all (block) rows */
3595   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3596   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3597   else m = newMat->rmap->n; /* Set by user */
3598 
3599   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3600   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3601 
3602   /* First process needs enough room for process with most rows */
3603   if (!rank) {
3604     mmax = rowners[1];
3605     for (i=2; i<=size; i++) {
3606       mmax = PetscMax(mmax, rowners[i]);
3607     }
3608   } else mmax = -1;             /* unused, but compilers complain */
3609 
3610   rowners[0] = 0;
3611   for (i=2; i<=size; i++) {
3612     rowners[i] += rowners[i-1];
3613   }
3614   rstart = rowners[rank];
3615   rend   = rowners[rank+1];
3616 
3617   /* distribute row lengths to all processors */
3618   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3619   if (!rank) {
3620     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3621     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3622     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3623     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3624     for (j=0; j<m; j++) {
3625       procsnz[0] += ourlens[j];
3626     }
3627     for (i=1; i<size; i++) {
3628       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3629       /* calculate the number of nonzeros on each processor */
3630       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3631         procsnz[i] += rowlengths[j];
3632       }
3633       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3634     }
3635     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3636   } else {
3637     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3638   }
3639 
3640   if (!rank) {
3641     /* determine max buffer needed and allocate it */
3642     maxnz = 0;
3643     for (i=0; i<size; i++) {
3644       maxnz = PetscMax(maxnz,procsnz[i]);
3645     }
3646     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3647 
3648     /* read in my part of the matrix column indices  */
3649     nz   = procsnz[0];
3650     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3651     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3652 
3653     /* read in every one elses and ship off */
3654     for (i=1; i<size; i++) {
3655       nz   = procsnz[i];
3656       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3657       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3658     }
3659     ierr = PetscFree(cols);CHKERRQ(ierr);
3660   } else {
3661     /* determine buffer space needed for message */
3662     nz = 0;
3663     for (i=0; i<m; i++) {
3664       nz += ourlens[i];
3665     }
3666     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3667 
3668     /* receive message of column indices*/
3669     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3670   }
3671 
3672   /* determine column ownership if matrix is not square */
3673   if (N != M) {
3674     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3675     else n = newMat->cmap->n;
3676     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3677     cstart = cend - n;
3678   } else {
3679     cstart = rstart;
3680     cend   = rend;
3681     n      = cend - cstart;
3682   }
3683 
3684   /* loop over local rows, determining number of off diagonal entries */
3685   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3686   jj   = 0;
3687   for (i=0; i<m; i++) {
3688     for (j=0; j<ourlens[i]; j++) {
3689       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3690       jj++;
3691     }
3692   }
3693 
3694   for (i=0; i<m; i++) {
3695     ourlens[i] -= offlens[i];
3696   }
3697   if (!sizesset) {
3698     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3699   }
3700 
3701   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3702 
3703   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3704 
3705   for (i=0; i<m; i++) {
3706     ourlens[i] += offlens[i];
3707   }
3708 
3709   if (!rank) {
3710     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3711 
3712     /* read in my part of the matrix numerical values  */
3713     nz   = procsnz[0];
3714     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3715 
3716     /* insert into matrix */
3717     jj      = rstart;
3718     smycols = mycols;
3719     svals   = vals;
3720     for (i=0; i<m; i++) {
3721       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3722       smycols += ourlens[i];
3723       svals   += ourlens[i];
3724       jj++;
3725     }
3726 
3727     /* read in other processors and ship out */
3728     for (i=1; i<size; i++) {
3729       nz   = procsnz[i];
3730       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3731       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3732     }
3733     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3734   } else {
3735     /* receive numeric values */
3736     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3737 
3738     /* receive message of values*/
3739     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3740 
3741     /* insert into matrix */
3742     jj      = rstart;
3743     smycols = mycols;
3744     svals   = vals;
3745     for (i=0; i<m; i++) {
3746       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3747       smycols += ourlens[i];
3748       svals   += ourlens[i];
3749       jj++;
3750     }
3751   }
3752   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3753   ierr = PetscFree(vals);CHKERRQ(ierr);
3754   ierr = PetscFree(mycols);CHKERRQ(ierr);
3755   ierr = PetscFree(rowners);CHKERRQ(ierr);
3756   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3757   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3758   PetscFunctionReturn(0);
3759 }
3760 
3761 #undef __FUNCT__
3762 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3763 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3764 {
3765   PetscErrorCode ierr;
3766   IS             iscol_local;
3767   PetscInt       csize;
3768 
3769   PetscFunctionBegin;
3770   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3771   if (call == MAT_REUSE_MATRIX) {
3772     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3773     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3774   } else {
3775     PetscInt cbs;
3776     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3777     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3778     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3779   }
3780   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3781   if (call == MAT_INITIAL_MATRIX) {
3782     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3783     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3784   }
3785   PetscFunctionReturn(0);
3786 }
3787 
3788 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3789 #undef __FUNCT__
3790 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3791 /*
3792     Not great since it makes two copies of the submatrix, first an SeqAIJ
3793   in local and then by concatenating the local matrices the end result.
3794   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3795 
3796   Note: This requires a sequential iscol with all indices.
3797 */
3798 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3799 {
3800   PetscErrorCode ierr;
3801   PetscMPIInt    rank,size;
3802   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3803   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3804   PetscBool      allcolumns, colflag;
3805   Mat            M,Mreuse;
3806   MatScalar      *vwork,*aa;
3807   MPI_Comm       comm;
3808   Mat_SeqAIJ     *aij;
3809 
3810   PetscFunctionBegin;
3811   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3812   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3813   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3814 
3815   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3816   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3817   if (colflag && ncol == mat->cmap->N) {
3818     allcolumns = PETSC_TRUE;
3819   } else {
3820     allcolumns = PETSC_FALSE;
3821   }
3822   if (call ==  MAT_REUSE_MATRIX) {
3823     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3824     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3825     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3826   } else {
3827     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3828   }
3829 
3830   /*
3831       m - number of local rows
3832       n - number of columns (same on all processors)
3833       rstart - first row in new global matrix generated
3834   */
3835   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3836   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3837   if (call == MAT_INITIAL_MATRIX) {
3838     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3839     ii  = aij->i;
3840     jj  = aij->j;
3841 
3842     /*
3843         Determine the number of non-zeros in the diagonal and off-diagonal
3844         portions of the matrix in order to do correct preallocation
3845     */
3846 
3847     /* first get start and end of "diagonal" columns */
3848     if (csize == PETSC_DECIDE) {
3849       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3850       if (mglobal == n) { /* square matrix */
3851         nlocal = m;
3852       } else {
3853         nlocal = n/size + ((n % size) > rank);
3854       }
3855     } else {
3856       nlocal = csize;
3857     }
3858     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3859     rstart = rend - nlocal;
3860     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3861 
3862     /* next, compute all the lengths */
3863     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3864     olens = dlens + m;
3865     for (i=0; i<m; i++) {
3866       jend = ii[i+1] - ii[i];
3867       olen = 0;
3868       dlen = 0;
3869       for (j=0; j<jend; j++) {
3870         if (*jj < rstart || *jj >= rend) olen++;
3871         else dlen++;
3872         jj++;
3873       }
3874       olens[i] = olen;
3875       dlens[i] = dlen;
3876     }
3877     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3878     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3879     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3880     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3881     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3882     ierr = PetscFree(dlens);CHKERRQ(ierr);
3883   } else {
3884     PetscInt ml,nl;
3885 
3886     M    = *newmat;
3887     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3888     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3889     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3890     /*
3891          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3892        rather than the slower MatSetValues().
3893     */
3894     M->was_assembled = PETSC_TRUE;
3895     M->assembled     = PETSC_FALSE;
3896   }
3897   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3898   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3899   ii   = aij->i;
3900   jj   = aij->j;
3901   aa   = aij->a;
3902   for (i=0; i<m; i++) {
3903     row   = rstart + i;
3904     nz    = ii[i+1] - ii[i];
3905     cwork = jj;     jj += nz;
3906     vwork = aa;     aa += nz;
3907     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3908   }
3909 
3910   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3911   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3912   *newmat = M;
3913 
3914   /* save submatrix used in processor for next request */
3915   if (call ==  MAT_INITIAL_MATRIX) {
3916     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3917     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3918   }
3919   PetscFunctionReturn(0);
3920 }
3921 
3922 #undef __FUNCT__
3923 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3924 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3925 {
3926   PetscInt       m,cstart, cend,j,nnz,i,d;
3927   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3928   const PetscInt *JJ;
3929   PetscScalar    *values;
3930   PetscErrorCode ierr;
3931 
3932   PetscFunctionBegin;
3933   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3934 
3935   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3936   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3937   m      = B->rmap->n;
3938   cstart = B->cmap->rstart;
3939   cend   = B->cmap->rend;
3940   rstart = B->rmap->rstart;
3941 
3942   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3943 
3944 #if defined(PETSC_USE_DEBUGGING)
3945   for (i=0; i<m; i++) {
3946     nnz = Ii[i+1]- Ii[i];
3947     JJ  = J + Ii[i];
3948     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3949     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3950     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3951   }
3952 #endif
3953 
3954   for (i=0; i<m; i++) {
3955     nnz     = Ii[i+1]- Ii[i];
3956     JJ      = J + Ii[i];
3957     nnz_max = PetscMax(nnz_max,nnz);
3958     d       = 0;
3959     for (j=0; j<nnz; j++) {
3960       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3961     }
3962     d_nnz[i] = d;
3963     o_nnz[i] = nnz - d;
3964   }
3965   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3966   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3967 
3968   if (v) values = (PetscScalar*)v;
3969   else {
3970     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3971     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3972   }
3973 
3974   for (i=0; i<m; i++) {
3975     ii   = i + rstart;
3976     nnz  = Ii[i+1]- Ii[i];
3977     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3978   }
3979   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3980   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3981 
3982   if (!v) {
3983     ierr = PetscFree(values);CHKERRQ(ierr);
3984   }
3985   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3986   PetscFunctionReturn(0);
3987 }
3988 
3989 #undef __FUNCT__
3990 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3991 /*@
3992    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3993    (the default parallel PETSc format).
3994 
3995    Collective on MPI_Comm
3996 
3997    Input Parameters:
3998 +  B - the matrix
3999 .  i - the indices into j for the start of each local row (starts with zero)
4000 .  j - the column indices for each local row (starts with zero)
4001 -  v - optional values in the matrix
4002 
4003    Level: developer
4004 
4005    Notes:
4006        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4007      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4008      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4009 
4010        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4011 
4012        The format which is used for the sparse matrix input, is equivalent to a
4013     row-major ordering.. i.e for the following matrix, the input data expected is
4014     as shown:
4015 
4016         1 0 0
4017         2 0 3     P0
4018        -------
4019         4 5 6     P1
4020 
4021      Process0 [P0]: rows_owned=[0,1]
4022         i =  {0,1,3}  [size = nrow+1  = 2+1]
4023         j =  {0,0,2}  [size = nz = 6]
4024         v =  {1,2,3}  [size = nz = 6]
4025 
4026      Process1 [P1]: rows_owned=[2]
4027         i =  {0,3}    [size = nrow+1  = 1+1]
4028         j =  {0,1,2}  [size = nz = 6]
4029         v =  {4,5,6}  [size = nz = 6]
4030 
4031 .keywords: matrix, aij, compressed row, sparse, parallel
4032 
4033 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
4034           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4035 @*/
4036 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4037 {
4038   PetscErrorCode ierr;
4039 
4040   PetscFunctionBegin;
4041   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4042   PetscFunctionReturn(0);
4043 }
4044 
4045 #undef __FUNCT__
4046 #define __FUNCT__ "MatMPIAIJSetPreallocation"
4047 /*@C
4048    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4049    (the default parallel PETSc format).  For good matrix assembly performance
4050    the user should preallocate the matrix storage by setting the parameters
4051    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4052    performance can be increased by more than a factor of 50.
4053 
4054    Collective on MPI_Comm
4055 
4056    Input Parameters:
4057 +  A - the matrix
4058 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4059            (same value is used for all local rows)
4060 .  d_nnz - array containing the number of nonzeros in the various rows of the
4061            DIAGONAL portion of the local submatrix (possibly different for each row)
4062            or NULL, if d_nz is used to specify the nonzero structure.
4063            The size of this array is equal to the number of local rows, i.e 'm'.
4064            For matrices that will be factored, you must leave room for (and set)
4065            the diagonal entry even if it is zero.
4066 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4067            submatrix (same value is used for all local rows).
4068 -  o_nnz - array containing the number of nonzeros in the various rows of the
4069            OFF-DIAGONAL portion of the local submatrix (possibly different for
4070            each row) or NULL, if o_nz is used to specify the nonzero
4071            structure. The size of this array is equal to the number
4072            of local rows, i.e 'm'.
4073 
4074    If the *_nnz parameter is given then the *_nz parameter is ignored
4075 
4076    The AIJ format (also called the Yale sparse matrix format or
4077    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4078    storage.  The stored row and column indices begin with zero.
4079    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
4080 
4081    The parallel matrix is partitioned such that the first m0 rows belong to
4082    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4083    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4084 
4085    The DIAGONAL portion of the local submatrix of a processor can be defined
4086    as the submatrix which is obtained by extraction the part corresponding to
4087    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4088    first row that belongs to the processor, r2 is the last row belonging to
4089    the this processor, and c1-c2 is range of indices of the local part of a
4090    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4091    common case of a square matrix, the row and column ranges are the same and
4092    the DIAGONAL part is also square. The remaining portion of the local
4093    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4094 
4095    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4096 
4097    You can call MatGetInfo() to get information on how effective the preallocation was;
4098    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4099    You can also run with the option -info and look for messages with the string
4100    malloc in them to see if additional memory allocation was needed.
4101 
4102    Example usage:
4103 
4104    Consider the following 8x8 matrix with 34 non-zero values, that is
4105    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4106    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4107    as follows:
4108 
4109 .vb
4110             1  2  0  |  0  3  0  |  0  4
4111     Proc0   0  5  6  |  7  0  0  |  8  0
4112             9  0 10  | 11  0  0  | 12  0
4113     -------------------------------------
4114            13  0 14  | 15 16 17  |  0  0
4115     Proc1   0 18  0  | 19 20 21  |  0  0
4116             0  0  0  | 22 23  0  | 24  0
4117     -------------------------------------
4118     Proc2  25 26 27  |  0  0 28  | 29  0
4119            30  0  0  | 31 32 33  |  0 34
4120 .ve
4121 
4122    This can be represented as a collection of submatrices as:
4123 
4124 .vb
4125       A B C
4126       D E F
4127       G H I
4128 .ve
4129 
4130    Where the submatrices A,B,C are owned by proc0, D,E,F are
4131    owned by proc1, G,H,I are owned by proc2.
4132 
4133    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4134    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4135    The 'M','N' parameters are 8,8, and have the same values on all procs.
4136 
4137    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4138    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4139    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4140    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4141    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4142    matrix, ans [DF] as another SeqAIJ matrix.
4143 
4144    When d_nz, o_nz parameters are specified, d_nz storage elements are
4145    allocated for every row of the local diagonal submatrix, and o_nz
4146    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4147    One way to choose d_nz and o_nz is to use the max nonzerors per local
4148    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4149    In this case, the values of d_nz,o_nz are:
4150 .vb
4151      proc0 : dnz = 2, o_nz = 2
4152      proc1 : dnz = 3, o_nz = 2
4153      proc2 : dnz = 1, o_nz = 4
4154 .ve
4155    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4156    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4157    for proc3. i.e we are using 12+15+10=37 storage locations to store
4158    34 values.
4159 
4160    When d_nnz, o_nnz parameters are specified, the storage is specified
4161    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4162    In the above case the values for d_nnz,o_nnz are:
4163 .vb
4164      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4165      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4166      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4167 .ve
4168    Here the space allocated is sum of all the above values i.e 34, and
4169    hence pre-allocation is perfect.
4170 
4171    Level: intermediate
4172 
4173 .keywords: matrix, aij, compressed row, sparse, parallel
4174 
4175 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4176           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4177 @*/
4178 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4179 {
4180   PetscErrorCode ierr;
4181 
4182   PetscFunctionBegin;
4183   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4184   PetscValidType(B,1);
4185   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4186   PetscFunctionReturn(0);
4187 }
4188 
4189 #undef __FUNCT__
4190 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4191 /*@
4192      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4193          CSR format the local rows.
4194 
4195    Collective on MPI_Comm
4196 
4197    Input Parameters:
4198 +  comm - MPI communicator
4199 .  m - number of local rows (Cannot be PETSC_DECIDE)
4200 .  n - This value should be the same as the local size used in creating the
4201        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4202        calculated if N is given) For square matrices n is almost always m.
4203 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4204 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4205 .   i - row indices
4206 .   j - column indices
4207 -   a - matrix values
4208 
4209    Output Parameter:
4210 .   mat - the matrix
4211 
4212    Level: intermediate
4213 
4214    Notes:
4215        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4216      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4217      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4218 
4219        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4220 
4221        The format which is used for the sparse matrix input, is equivalent to a
4222     row-major ordering.. i.e for the following matrix, the input data expected is
4223     as shown:
4224 
4225         1 0 0
4226         2 0 3     P0
4227        -------
4228         4 5 6     P1
4229 
4230      Process0 [P0]: rows_owned=[0,1]
4231         i =  {0,1,3}  [size = nrow+1  = 2+1]
4232         j =  {0,0,2}  [size = nz = 6]
4233         v =  {1,2,3}  [size = nz = 6]
4234 
4235      Process1 [P1]: rows_owned=[2]
4236         i =  {0,3}    [size = nrow+1  = 1+1]
4237         j =  {0,1,2}  [size = nz = 6]
4238         v =  {4,5,6}  [size = nz = 6]
4239 
4240 .keywords: matrix, aij, compressed row, sparse, parallel
4241 
4242 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4243           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4244 @*/
4245 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4246 {
4247   PetscErrorCode ierr;
4248 
4249   PetscFunctionBegin;
4250   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4251   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4252   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4253   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4254   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4255   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4256   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4257   PetscFunctionReturn(0);
4258 }
4259 
4260 #undef __FUNCT__
4261 #define __FUNCT__ "MatCreateAIJ"
4262 /*@C
4263    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4264    (the default parallel PETSc format).  For good matrix assembly performance
4265    the user should preallocate the matrix storage by setting the parameters
4266    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4267    performance can be increased by more than a factor of 50.
4268 
4269    Collective on MPI_Comm
4270 
4271    Input Parameters:
4272 +  comm - MPI communicator
4273 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4274            This value should be the same as the local size used in creating the
4275            y vector for the matrix-vector product y = Ax.
4276 .  n - This value should be the same as the local size used in creating the
4277        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4278        calculated if N is given) For square matrices n is almost always m.
4279 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4280 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4281 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4282            (same value is used for all local rows)
4283 .  d_nnz - array containing the number of nonzeros in the various rows of the
4284            DIAGONAL portion of the local submatrix (possibly different for each row)
4285            or NULL, if d_nz is used to specify the nonzero structure.
4286            The size of this array is equal to the number of local rows, i.e 'm'.
4287 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4288            submatrix (same value is used for all local rows).
4289 -  o_nnz - array containing the number of nonzeros in the various rows of the
4290            OFF-DIAGONAL portion of the local submatrix (possibly different for
4291            each row) or NULL, if o_nz is used to specify the nonzero
4292            structure. The size of this array is equal to the number
4293            of local rows, i.e 'm'.
4294 
4295    Output Parameter:
4296 .  A - the matrix
4297 
4298    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4299    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4300    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4301 
4302    Notes:
4303    If the *_nnz parameter is given then the *_nz parameter is ignored
4304 
4305    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4306    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4307    storage requirements for this matrix.
4308 
4309    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4310    processor than it must be used on all processors that share the object for
4311    that argument.
4312 
4313    The user MUST specify either the local or global matrix dimensions
4314    (possibly both).
4315 
4316    The parallel matrix is partitioned across processors such that the
4317    first m0 rows belong to process 0, the next m1 rows belong to
4318    process 1, the next m2 rows belong to process 2 etc.. where
4319    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4320    values corresponding to [m x N] submatrix.
4321 
4322    The columns are logically partitioned with the n0 columns belonging
4323    to 0th partition, the next n1 columns belonging to the next
4324    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4325 
4326    The DIAGONAL portion of the local submatrix on any given processor
4327    is the submatrix corresponding to the rows and columns m,n
4328    corresponding to the given processor. i.e diagonal matrix on
4329    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4330    etc. The remaining portion of the local submatrix [m x (N-n)]
4331    constitute the OFF-DIAGONAL portion. The example below better
4332    illustrates this concept.
4333 
4334    For a square global matrix we define each processor's diagonal portion
4335    to be its local rows and the corresponding columns (a square submatrix);
4336    each processor's off-diagonal portion encompasses the remainder of the
4337    local matrix (a rectangular submatrix).
4338 
4339    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4340 
4341    When calling this routine with a single process communicator, a matrix of
4342    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4343    type of communicator, use the construction mechanism:
4344      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4345 
4346    By default, this format uses inodes (identical nodes) when possible.
4347    We search for consecutive rows with the same nonzero structure, thereby
4348    reusing matrix information to achieve increased efficiency.
4349 
4350    Options Database Keys:
4351 +  -mat_no_inode  - Do not use inodes
4352 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4353 -  -mat_aij_oneindex - Internally use indexing starting at 1
4354         rather than 0.  Note that when calling MatSetValues(),
4355         the user still MUST index entries starting at 0!
4356 
4357 
4358    Example usage:
4359 
4360    Consider the following 8x8 matrix with 34 non-zero values, that is
4361    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4362    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4363    as follows:
4364 
4365 .vb
4366             1  2  0  |  0  3  0  |  0  4
4367     Proc0   0  5  6  |  7  0  0  |  8  0
4368             9  0 10  | 11  0  0  | 12  0
4369     -------------------------------------
4370            13  0 14  | 15 16 17  |  0  0
4371     Proc1   0 18  0  | 19 20 21  |  0  0
4372             0  0  0  | 22 23  0  | 24  0
4373     -------------------------------------
4374     Proc2  25 26 27  |  0  0 28  | 29  0
4375            30  0  0  | 31 32 33  |  0 34
4376 .ve
4377 
4378    This can be represented as a collection of submatrices as:
4379 
4380 .vb
4381       A B C
4382       D E F
4383       G H I
4384 .ve
4385 
4386    Where the submatrices A,B,C are owned by proc0, D,E,F are
4387    owned by proc1, G,H,I are owned by proc2.
4388 
4389    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4390    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4391    The 'M','N' parameters are 8,8, and have the same values on all procs.
4392 
4393    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4394    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4395    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4396    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4397    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4398    matrix, ans [DF] as another SeqAIJ matrix.
4399 
4400    When d_nz, o_nz parameters are specified, d_nz storage elements are
4401    allocated for every row of the local diagonal submatrix, and o_nz
4402    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4403    One way to choose d_nz and o_nz is to use the max nonzerors per local
4404    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4405    In this case, the values of d_nz,o_nz are:
4406 .vb
4407      proc0 : dnz = 2, o_nz = 2
4408      proc1 : dnz = 3, o_nz = 2
4409      proc2 : dnz = 1, o_nz = 4
4410 .ve
4411    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4412    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4413    for proc3. i.e we are using 12+15+10=37 storage locations to store
4414    34 values.
4415 
4416    When d_nnz, o_nnz parameters are specified, the storage is specified
4417    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4418    In the above case the values for d_nnz,o_nnz are:
4419 .vb
4420      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4421      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4422      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4423 .ve
4424    Here the space allocated is sum of all the above values i.e 34, and
4425    hence pre-allocation is perfect.
4426 
4427    Level: intermediate
4428 
4429 .keywords: matrix, aij, compressed row, sparse, parallel
4430 
4431 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4432           MPIAIJ, MatCreateMPIAIJWithArrays()
4433 @*/
4434 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4435 {
4436   PetscErrorCode ierr;
4437   PetscMPIInt    size;
4438 
4439   PetscFunctionBegin;
4440   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4441   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4442   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4443   if (size > 1) {
4444     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4445     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4446   } else {
4447     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4448     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4449   }
4450   PetscFunctionReturn(0);
4451 }
4452 
4453 #undef __FUNCT__
4454 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4455 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4456 {
4457   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4458 
4459   PetscFunctionBegin;
4460   *Ad     = a->A;
4461   *Ao     = a->B;
4462   *colmap = a->garray;
4463   PetscFunctionReturn(0);
4464 }
4465 
4466 #undef __FUNCT__
4467 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4468 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4469 {
4470   PetscErrorCode ierr;
4471   PetscInt       i;
4472   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4473 
4474   PetscFunctionBegin;
4475   if (coloring->ctype == IS_COLORING_GLOBAL) {
4476     ISColoringValue *allcolors,*colors;
4477     ISColoring      ocoloring;
4478 
4479     /* set coloring for diagonal portion */
4480     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4481 
4482     /* set coloring for off-diagonal portion */
4483     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4484     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4485     for (i=0; i<a->B->cmap->n; i++) {
4486       colors[i] = allcolors[a->garray[i]];
4487     }
4488     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4489     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4490     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4491     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4492   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4493     ISColoringValue *colors;
4494     PetscInt        *larray;
4495     ISColoring      ocoloring;
4496 
4497     /* set coloring for diagonal portion */
4498     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4499     for (i=0; i<a->A->cmap->n; i++) {
4500       larray[i] = i + A->cmap->rstart;
4501     }
4502     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4503     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4504     for (i=0; i<a->A->cmap->n; i++) {
4505       colors[i] = coloring->colors[larray[i]];
4506     }
4507     ierr = PetscFree(larray);CHKERRQ(ierr);
4508     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4509     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4510     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4511 
4512     /* set coloring for off-diagonal portion */
4513     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4514     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4515     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4516     for (i=0; i<a->B->cmap->n; i++) {
4517       colors[i] = coloring->colors[larray[i]];
4518     }
4519     ierr = PetscFree(larray);CHKERRQ(ierr);
4520     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4521     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4522     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4523   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4524   PetscFunctionReturn(0);
4525 }
4526 
4527 #undef __FUNCT__
4528 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4529 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4530 {
4531   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4532   PetscErrorCode ierr;
4533 
4534   PetscFunctionBegin;
4535   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4536   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4537   PetscFunctionReturn(0);
4538 }
4539 
4540 #undef __FUNCT__
4541 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4542 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4543 {
4544   PetscErrorCode ierr;
4545   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4546   PetscInt       *indx;
4547 
4548   PetscFunctionBegin;
4549   /* This routine will ONLY return MPIAIJ type matrix */
4550   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4551   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4552   if (n == PETSC_DECIDE) {
4553     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4554   }
4555   /* Check sum(n) = N */
4556   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4557   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4558 
4559   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4560   rstart -= m;
4561 
4562   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4563   for (i=0; i<m; i++) {
4564     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4565     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4566     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4567   }
4568 
4569   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4570   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4571   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4572   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4573   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4574   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4575   PetscFunctionReturn(0);
4576 }
4577 
4578 #undef __FUNCT__
4579 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4580 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4581 {
4582   PetscErrorCode ierr;
4583   PetscInt       m,N,i,rstart,nnz,Ii;
4584   PetscInt       *indx;
4585   PetscScalar    *values;
4586 
4587   PetscFunctionBegin;
4588   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4589   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4590   for (i=0; i<m; i++) {
4591     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4592     Ii   = i + rstart;
4593     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4594     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4595   }
4596   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4597   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4598   PetscFunctionReturn(0);
4599 }
4600 
4601 #undef __FUNCT__
4602 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4603 /*@
4604       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4605                  matrices from each processor
4606 
4607     Collective on MPI_Comm
4608 
4609    Input Parameters:
4610 +    comm - the communicators the parallel matrix will live on
4611 .    inmat - the input sequential matrices
4612 .    n - number of local columns (or PETSC_DECIDE)
4613 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4614 
4615    Output Parameter:
4616 .    outmat - the parallel matrix generated
4617 
4618     Level: advanced
4619 
4620    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4621 
4622 @*/
4623 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4624 {
4625   PetscErrorCode ierr;
4626   PetscMPIInt    size;
4627 
4628   PetscFunctionBegin;
4629   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4630   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4631   if (size == 1) {
4632     if (scall == MAT_INITIAL_MATRIX) {
4633       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4634     } else {
4635       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4636     }
4637   } else {
4638     if (scall == MAT_INITIAL_MATRIX) {
4639       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4640     }
4641     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4642   }
4643   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4644   PetscFunctionReturn(0);
4645 }
4646 
4647 #undef __FUNCT__
4648 #define __FUNCT__ "MatFileSplit"
4649 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4650 {
4651   PetscErrorCode    ierr;
4652   PetscMPIInt       rank;
4653   PetscInt          m,N,i,rstart,nnz;
4654   size_t            len;
4655   const PetscInt    *indx;
4656   PetscViewer       out;
4657   char              *name;
4658   Mat               B;
4659   const PetscScalar *values;
4660 
4661   PetscFunctionBegin;
4662   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4663   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4664   /* Should this be the type of the diagonal block of A? */
4665   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4666   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4667   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4668   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4669   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4670   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4671   for (i=0; i<m; i++) {
4672     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4673     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4674     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4675   }
4676   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4677   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4678 
4679   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4680   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4681   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4682   sprintf(name,"%s.%d",outfile,rank);
4683   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4684   ierr = PetscFree(name);CHKERRQ(ierr);
4685   ierr = MatView(B,out);CHKERRQ(ierr);
4686   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4687   ierr = MatDestroy(&B);CHKERRQ(ierr);
4688   PetscFunctionReturn(0);
4689 }
4690 
4691 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4692 #undef __FUNCT__
4693 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4694 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4695 {
4696   PetscErrorCode      ierr;
4697   Mat_Merge_SeqsToMPI *merge;
4698   PetscContainer      container;
4699 
4700   PetscFunctionBegin;
4701   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4702   if (container) {
4703     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4704     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4705     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4706     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4707     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4708     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4709     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4710     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4711     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4712     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4713     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4714     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4715     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4716     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4717     ierr = PetscFree(merge);CHKERRQ(ierr);
4718     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4719   }
4720   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4721   PetscFunctionReturn(0);
4722 }
4723 
4724 #include <../src/mat/utils/freespace.h>
4725 #include <petscbt.h>
4726 
4727 #undef __FUNCT__
4728 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4729 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4730 {
4731   PetscErrorCode      ierr;
4732   MPI_Comm            comm;
4733   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4734   PetscMPIInt         size,rank,taga,*len_s;
4735   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4736   PetscInt            proc,m;
4737   PetscInt            **buf_ri,**buf_rj;
4738   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4739   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4740   MPI_Request         *s_waits,*r_waits;
4741   MPI_Status          *status;
4742   MatScalar           *aa=a->a;
4743   MatScalar           **abuf_r,*ba_i;
4744   Mat_Merge_SeqsToMPI *merge;
4745   PetscContainer      container;
4746 
4747   PetscFunctionBegin;
4748   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4749   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4750 
4751   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4752   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4753 
4754   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4755   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4756 
4757   bi     = merge->bi;
4758   bj     = merge->bj;
4759   buf_ri = merge->buf_ri;
4760   buf_rj = merge->buf_rj;
4761 
4762   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4763   owners = merge->rowmap->range;
4764   len_s  = merge->len_s;
4765 
4766   /* send and recv matrix values */
4767   /*-----------------------------*/
4768   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4769   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4770 
4771   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4772   for (proc=0,k=0; proc<size; proc++) {
4773     if (!len_s[proc]) continue;
4774     i    = owners[proc];
4775     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4776     k++;
4777   }
4778 
4779   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4780   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4781   ierr = PetscFree(status);CHKERRQ(ierr);
4782 
4783   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4784   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4785 
4786   /* insert mat values of mpimat */
4787   /*----------------------------*/
4788   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4789   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4790 
4791   for (k=0; k<merge->nrecv; k++) {
4792     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4793     nrows       = *(buf_ri_k[k]);
4794     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4795     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4796   }
4797 
4798   /* set values of ba */
4799   m = merge->rowmap->n;
4800   for (i=0; i<m; i++) {
4801     arow = owners[rank] + i;
4802     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4803     bnzi = bi[i+1] - bi[i];
4804     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4805 
4806     /* add local non-zero vals of this proc's seqmat into ba */
4807     anzi   = ai[arow+1] - ai[arow];
4808     aj     = a->j + ai[arow];
4809     aa     = a->a + ai[arow];
4810     nextaj = 0;
4811     for (j=0; nextaj<anzi; j++) {
4812       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4813         ba_i[j] += aa[nextaj++];
4814       }
4815     }
4816 
4817     /* add received vals into ba */
4818     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4819       /* i-th row */
4820       if (i == *nextrow[k]) {
4821         anzi   = *(nextai[k]+1) - *nextai[k];
4822         aj     = buf_rj[k] + *(nextai[k]);
4823         aa     = abuf_r[k] + *(nextai[k]);
4824         nextaj = 0;
4825         for (j=0; nextaj<anzi; j++) {
4826           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4827             ba_i[j] += aa[nextaj++];
4828           }
4829         }
4830         nextrow[k]++; nextai[k]++;
4831       }
4832     }
4833     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4834   }
4835   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4836   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4837 
4838   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4839   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4840   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4841   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4842   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4843   PetscFunctionReturn(0);
4844 }
4845 
4846 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4847 
4848 #undef __FUNCT__
4849 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4850 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4851 {
4852   PetscErrorCode      ierr;
4853   Mat                 B_mpi;
4854   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4855   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4856   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4857   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4858   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4859   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4860   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4861   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4862   MPI_Status          *status;
4863   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4864   PetscBT             lnkbt;
4865   Mat_Merge_SeqsToMPI *merge;
4866   PetscContainer      container;
4867 
4868   PetscFunctionBegin;
4869   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4870 
4871   /* make sure it is a PETSc comm */
4872   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4873   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4874   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4875 
4876   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4877   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4878 
4879   /* determine row ownership */
4880   /*---------------------------------------------------------*/
4881   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4882   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4883   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4884   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4885   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4886   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4887   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4888 
4889   m      = merge->rowmap->n;
4890   owners = merge->rowmap->range;
4891 
4892   /* determine the number of messages to send, their lengths */
4893   /*---------------------------------------------------------*/
4894   len_s = merge->len_s;
4895 
4896   len          = 0; /* length of buf_si[] */
4897   merge->nsend = 0;
4898   for (proc=0; proc<size; proc++) {
4899     len_si[proc] = 0;
4900     if (proc == rank) {
4901       len_s[proc] = 0;
4902     } else {
4903       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4904       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4905     }
4906     if (len_s[proc]) {
4907       merge->nsend++;
4908       nrows = 0;
4909       for (i=owners[proc]; i<owners[proc+1]; i++) {
4910         if (ai[i+1] > ai[i]) nrows++;
4911       }
4912       len_si[proc] = 2*(nrows+1);
4913       len         += len_si[proc];
4914     }
4915   }
4916 
4917   /* determine the number and length of messages to receive for ij-structure */
4918   /*-------------------------------------------------------------------------*/
4919   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4920   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4921 
4922   /* post the Irecv of j-structure */
4923   /*-------------------------------*/
4924   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4925   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4926 
4927   /* post the Isend of j-structure */
4928   /*--------------------------------*/
4929   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4930 
4931   for (proc=0, k=0; proc<size; proc++) {
4932     if (!len_s[proc]) continue;
4933     i    = owners[proc];
4934     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4935     k++;
4936   }
4937 
4938   /* receives and sends of j-structure are complete */
4939   /*------------------------------------------------*/
4940   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4941   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4942 
4943   /* send and recv i-structure */
4944   /*---------------------------*/
4945   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4946   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4947 
4948   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4949   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4950   for (proc=0,k=0; proc<size; proc++) {
4951     if (!len_s[proc]) continue;
4952     /* form outgoing message for i-structure:
4953          buf_si[0]:                 nrows to be sent
4954                [1:nrows]:           row index (global)
4955                [nrows+1:2*nrows+1]: i-structure index
4956     */
4957     /*-------------------------------------------*/
4958     nrows       = len_si[proc]/2 - 1;
4959     buf_si_i    = buf_si + nrows+1;
4960     buf_si[0]   = nrows;
4961     buf_si_i[0] = 0;
4962     nrows       = 0;
4963     for (i=owners[proc]; i<owners[proc+1]; i++) {
4964       anzi = ai[i+1] - ai[i];
4965       if (anzi) {
4966         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4967         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4968         nrows++;
4969       }
4970     }
4971     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4972     k++;
4973     buf_si += len_si[proc];
4974   }
4975 
4976   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4977   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4978 
4979   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4980   for (i=0; i<merge->nrecv; i++) {
4981     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4982   }
4983 
4984   ierr = PetscFree(len_si);CHKERRQ(ierr);
4985   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4986   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4987   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4988   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4989   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4990   ierr = PetscFree(status);CHKERRQ(ierr);
4991 
4992   /* compute a local seq matrix in each processor */
4993   /*----------------------------------------------*/
4994   /* allocate bi array and free space for accumulating nonzero column info */
4995   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4996   bi[0] = 0;
4997 
4998   /* create and initialize a linked list */
4999   nlnk = N+1;
5000   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5001 
5002   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5003   len  = ai[owners[rank+1]] - ai[owners[rank]];
5004   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
5005 
5006   current_space = free_space;
5007 
5008   /* determine symbolic info for each local row */
5009   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
5010 
5011   for (k=0; k<merge->nrecv; k++) {
5012     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5013     nrows       = *buf_ri_k[k];
5014     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5015     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5016   }
5017 
5018   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5019   len  = 0;
5020   for (i=0; i<m; i++) {
5021     bnzi = 0;
5022     /* add local non-zero cols of this proc's seqmat into lnk */
5023     arow  = owners[rank] + i;
5024     anzi  = ai[arow+1] - ai[arow];
5025     aj    = a->j + ai[arow];
5026     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5027     bnzi += nlnk;
5028     /* add received col data into lnk */
5029     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5030       if (i == *nextrow[k]) { /* i-th row */
5031         anzi  = *(nextai[k]+1) - *nextai[k];
5032         aj    = buf_rj[k] + *nextai[k];
5033         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5034         bnzi += nlnk;
5035         nextrow[k]++; nextai[k]++;
5036       }
5037     }
5038     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5039 
5040     /* if free space is not available, make more free space */
5041     if (current_space->local_remaining<bnzi) {
5042       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
5043       nspacedouble++;
5044     }
5045     /* copy data into free space, then initialize lnk */
5046     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5047     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5048 
5049     current_space->array           += bnzi;
5050     current_space->local_used      += bnzi;
5051     current_space->local_remaining -= bnzi;
5052 
5053     bi[i+1] = bi[i] + bnzi;
5054   }
5055 
5056   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5057 
5058   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
5059   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5060   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5061 
5062   /* create symbolic parallel matrix B_mpi */
5063   /*---------------------------------------*/
5064   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5065   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5066   if (n==PETSC_DECIDE) {
5067     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5068   } else {
5069     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5070   }
5071   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5072   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5073   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5074   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5075   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5076 
5077   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5078   B_mpi->assembled    = PETSC_FALSE;
5079   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5080   merge->bi           = bi;
5081   merge->bj           = bj;
5082   merge->buf_ri       = buf_ri;
5083   merge->buf_rj       = buf_rj;
5084   merge->coi          = NULL;
5085   merge->coj          = NULL;
5086   merge->owners_co    = NULL;
5087 
5088   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5089 
5090   /* attach the supporting struct to B_mpi for reuse */
5091   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5092   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5093   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5094   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5095   *mpimat = B_mpi;
5096 
5097   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5098   PetscFunctionReturn(0);
5099 }
5100 
5101 #undef __FUNCT__
5102 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
5103 /*@C
5104       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
5105                  matrices from each processor
5106 
5107     Collective on MPI_Comm
5108 
5109    Input Parameters:
5110 +    comm - the communicators the parallel matrix will live on
5111 .    seqmat - the input sequential matrices
5112 .    m - number of local rows (or PETSC_DECIDE)
5113 .    n - number of local columns (or PETSC_DECIDE)
5114 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5115 
5116    Output Parameter:
5117 .    mpimat - the parallel matrix generated
5118 
5119     Level: advanced
5120 
5121    Notes:
5122      The dimensions of the sequential matrix in each processor MUST be the same.
5123      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5124      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5125 @*/
5126 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5127 {
5128   PetscErrorCode ierr;
5129   PetscMPIInt    size;
5130 
5131   PetscFunctionBegin;
5132   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5133   if (size == 1) {
5134     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5135     if (scall == MAT_INITIAL_MATRIX) {
5136       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5137     } else {
5138       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5139     }
5140     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5141     PetscFunctionReturn(0);
5142   }
5143   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5144   if (scall == MAT_INITIAL_MATRIX) {
5145     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5146   }
5147   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5148   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5149   PetscFunctionReturn(0);
5150 }
5151 
5152 #undef __FUNCT__
5153 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5154 /*@
5155      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5156           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5157           with MatGetSize()
5158 
5159     Not Collective
5160 
5161    Input Parameters:
5162 +    A - the matrix
5163 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5164 
5165    Output Parameter:
5166 .    A_loc - the local sequential matrix generated
5167 
5168     Level: developer
5169 
5170 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5171 
5172 @*/
5173 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5174 {
5175   PetscErrorCode ierr;
5176   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5177   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5178   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5179   MatScalar      *aa=a->a,*ba=b->a,*cam;
5180   PetscScalar    *ca;
5181   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5182   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5183   PetscBool      match;
5184 
5185   PetscFunctionBegin;
5186   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5187   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5188   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5189   if (scall == MAT_INITIAL_MATRIX) {
5190     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5191     ci[0] = 0;
5192     for (i=0; i<am; i++) {
5193       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5194     }
5195     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5196     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5197     k    = 0;
5198     for (i=0; i<am; i++) {
5199       ncols_o = bi[i+1] - bi[i];
5200       ncols_d = ai[i+1] - ai[i];
5201       /* off-diagonal portion of A */
5202       for (jo=0; jo<ncols_o; jo++) {
5203         col = cmap[*bj];
5204         if (col >= cstart) break;
5205         cj[k]   = col; bj++;
5206         ca[k++] = *ba++;
5207       }
5208       /* diagonal portion of A */
5209       for (j=0; j<ncols_d; j++) {
5210         cj[k]   = cstart + *aj++;
5211         ca[k++] = *aa++;
5212       }
5213       /* off-diagonal portion of A */
5214       for (j=jo; j<ncols_o; j++) {
5215         cj[k]   = cmap[*bj++];
5216         ca[k++] = *ba++;
5217       }
5218     }
5219     /* put together the new matrix */
5220     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5221     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5222     /* Since these are PETSc arrays, change flags to free them as necessary. */
5223     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5224     mat->free_a  = PETSC_TRUE;
5225     mat->free_ij = PETSC_TRUE;
5226     mat->nonew   = 0;
5227   } else if (scall == MAT_REUSE_MATRIX) {
5228     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5229     ci = mat->i; cj = mat->j; cam = mat->a;
5230     for (i=0; i<am; i++) {
5231       /* off-diagonal portion of A */
5232       ncols_o = bi[i+1] - bi[i];
5233       for (jo=0; jo<ncols_o; jo++) {
5234         col = cmap[*bj];
5235         if (col >= cstart) break;
5236         *cam++ = *ba++; bj++;
5237       }
5238       /* diagonal portion of A */
5239       ncols_d = ai[i+1] - ai[i];
5240       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5241       /* off-diagonal portion of A */
5242       for (j=jo; j<ncols_o; j++) {
5243         *cam++ = *ba++; bj++;
5244       }
5245     }
5246   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5247   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5248   PetscFunctionReturn(0);
5249 }
5250 
5251 #undef __FUNCT__
5252 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5253 /*@C
5254      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5255 
5256     Not Collective
5257 
5258    Input Parameters:
5259 +    A - the matrix
5260 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5261 -    row, col - index sets of rows and columns to extract (or NULL)
5262 
5263    Output Parameter:
5264 .    A_loc - the local sequential matrix generated
5265 
5266     Level: developer
5267 
5268 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5269 
5270 @*/
5271 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5272 {
5273   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5274   PetscErrorCode ierr;
5275   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5276   IS             isrowa,iscola;
5277   Mat            *aloc;
5278   PetscBool      match;
5279 
5280   PetscFunctionBegin;
5281   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5282   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5283   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5284   if (!row) {
5285     start = A->rmap->rstart; end = A->rmap->rend;
5286     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5287   } else {
5288     isrowa = *row;
5289   }
5290   if (!col) {
5291     start = A->cmap->rstart;
5292     cmap  = a->garray;
5293     nzA   = a->A->cmap->n;
5294     nzB   = a->B->cmap->n;
5295     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5296     ncols = 0;
5297     for (i=0; i<nzB; i++) {
5298       if (cmap[i] < start) idx[ncols++] = cmap[i];
5299       else break;
5300     }
5301     imark = i;
5302     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5303     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5304     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5305   } else {
5306     iscola = *col;
5307   }
5308   if (scall != MAT_INITIAL_MATRIX) {
5309     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5310     aloc[0] = *A_loc;
5311   }
5312   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5313   *A_loc = aloc[0];
5314   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5315   if (!row) {
5316     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5317   }
5318   if (!col) {
5319     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5320   }
5321   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5322   PetscFunctionReturn(0);
5323 }
5324 
5325 #undef __FUNCT__
5326 #define __FUNCT__ "MatGetBrowsOfAcols"
5327 /*@C
5328     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5329 
5330     Collective on Mat
5331 
5332    Input Parameters:
5333 +    A,B - the matrices in mpiaij format
5334 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5335 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5336 
5337    Output Parameter:
5338 +    rowb, colb - index sets of rows and columns of B to extract
5339 -    B_seq - the sequential matrix generated
5340 
5341     Level: developer
5342 
5343 @*/
5344 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5345 {
5346   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5347   PetscErrorCode ierr;
5348   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5349   IS             isrowb,iscolb;
5350   Mat            *bseq=NULL;
5351 
5352   PetscFunctionBegin;
5353   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5354     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5355   }
5356   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5357 
5358   if (scall == MAT_INITIAL_MATRIX) {
5359     start = A->cmap->rstart;
5360     cmap  = a->garray;
5361     nzA   = a->A->cmap->n;
5362     nzB   = a->B->cmap->n;
5363     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5364     ncols = 0;
5365     for (i=0; i<nzB; i++) {  /* row < local row index */
5366       if (cmap[i] < start) idx[ncols++] = cmap[i];
5367       else break;
5368     }
5369     imark = i;
5370     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5371     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5372     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5373     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5374   } else {
5375     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5376     isrowb  = *rowb; iscolb = *colb;
5377     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5378     bseq[0] = *B_seq;
5379   }
5380   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5381   *B_seq = bseq[0];
5382   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5383   if (!rowb) {
5384     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5385   } else {
5386     *rowb = isrowb;
5387   }
5388   if (!colb) {
5389     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5390   } else {
5391     *colb = iscolb;
5392   }
5393   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5394   PetscFunctionReturn(0);
5395 }
5396 
5397 #undef __FUNCT__
5398 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5399 /*
5400     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5401     of the OFF-DIAGONAL portion of local A
5402 
5403     Collective on Mat
5404 
5405    Input Parameters:
5406 +    A,B - the matrices in mpiaij format
5407 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5408 
5409    Output Parameter:
5410 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5411 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5412 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5413 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5414 
5415     Level: developer
5416 
5417 */
5418 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5419 {
5420   VecScatter_MPI_General *gen_to,*gen_from;
5421   PetscErrorCode         ierr;
5422   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5423   Mat_SeqAIJ             *b_oth;
5424   VecScatter             ctx =a->Mvctx;
5425   MPI_Comm               comm;
5426   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5427   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5428   PetscScalar            *rvalues,*svalues;
5429   MatScalar              *b_otha,*bufa,*bufA;
5430   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5431   MPI_Request            *rwaits = NULL,*swaits = NULL;
5432   MPI_Status             *sstatus,rstatus;
5433   PetscMPIInt            jj;
5434   PetscInt               *cols,sbs,rbs;
5435   PetscScalar            *vals;
5436 
5437   PetscFunctionBegin;
5438   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5439   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5440     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5441   }
5442   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5443   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5444 
5445   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5446   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5447   rvalues  = gen_from->values; /* holds the length of receiving row */
5448   svalues  = gen_to->values;   /* holds the length of sending row */
5449   nrecvs   = gen_from->n;
5450   nsends   = gen_to->n;
5451 
5452   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5453   srow    = gen_to->indices;    /* local row index to be sent */
5454   sstarts = gen_to->starts;
5455   sprocs  = gen_to->procs;
5456   sstatus = gen_to->sstatus;
5457   sbs     = gen_to->bs;
5458   rstarts = gen_from->starts;
5459   rprocs  = gen_from->procs;
5460   rbs     = gen_from->bs;
5461 
5462   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5463   if (scall == MAT_INITIAL_MATRIX) {
5464     /* i-array */
5465     /*---------*/
5466     /*  post receives */
5467     for (i=0; i<nrecvs; i++) {
5468       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5469       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5470       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5471     }
5472 
5473     /* pack the outgoing message */
5474     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5475 
5476     sstartsj[0] = 0;
5477     rstartsj[0] = 0;
5478     len         = 0; /* total length of j or a array to be sent */
5479     k           = 0;
5480     for (i=0; i<nsends; i++) {
5481       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5482       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5483       for (j=0; j<nrows; j++) {
5484         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5485         for (l=0; l<sbs; l++) {
5486           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5487 
5488           rowlen[j*sbs+l] = ncols;
5489 
5490           len += ncols;
5491           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5492         }
5493         k++;
5494       }
5495       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5496 
5497       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5498     }
5499     /* recvs and sends of i-array are completed */
5500     i = nrecvs;
5501     while (i--) {
5502       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5503     }
5504     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5505 
5506     /* allocate buffers for sending j and a arrays */
5507     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5508     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5509 
5510     /* create i-array of B_oth */
5511     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5512 
5513     b_othi[0] = 0;
5514     len       = 0; /* total length of j or a array to be received */
5515     k         = 0;
5516     for (i=0; i<nrecvs; i++) {
5517       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5518       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5519       for (j=0; j<nrows; j++) {
5520         b_othi[k+1] = b_othi[k] + rowlen[j];
5521         len        += rowlen[j]; k++;
5522       }
5523       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5524     }
5525 
5526     /* allocate space for j and a arrrays of B_oth */
5527     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5528     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5529 
5530     /* j-array */
5531     /*---------*/
5532     /*  post receives of j-array */
5533     for (i=0; i<nrecvs; i++) {
5534       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5535       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5536     }
5537 
5538     /* pack the outgoing message j-array */
5539     k = 0;
5540     for (i=0; i<nsends; i++) {
5541       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5542       bufJ  = bufj+sstartsj[i];
5543       for (j=0; j<nrows; j++) {
5544         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5545         for (ll=0; ll<sbs; ll++) {
5546           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5547           for (l=0; l<ncols; l++) {
5548             *bufJ++ = cols[l];
5549           }
5550           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5551         }
5552       }
5553       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5554     }
5555 
5556     /* recvs and sends of j-array are completed */
5557     i = nrecvs;
5558     while (i--) {
5559       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5560     }
5561     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5562   } else if (scall == MAT_REUSE_MATRIX) {
5563     sstartsj = *startsj_s;
5564     rstartsj = *startsj_r;
5565     bufa     = *bufa_ptr;
5566     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5567     b_otha   = b_oth->a;
5568   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5569 
5570   /* a-array */
5571   /*---------*/
5572   /*  post receives of a-array */
5573   for (i=0; i<nrecvs; i++) {
5574     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5575     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5576   }
5577 
5578   /* pack the outgoing message a-array */
5579   k = 0;
5580   for (i=0; i<nsends; i++) {
5581     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5582     bufA  = bufa+sstartsj[i];
5583     for (j=0; j<nrows; j++) {
5584       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5585       for (ll=0; ll<sbs; ll++) {
5586         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5587         for (l=0; l<ncols; l++) {
5588           *bufA++ = vals[l];
5589         }
5590         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5591       }
5592     }
5593     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5594   }
5595   /* recvs and sends of a-array are completed */
5596   i = nrecvs;
5597   while (i--) {
5598     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5599   }
5600   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5601   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5602 
5603   if (scall == MAT_INITIAL_MATRIX) {
5604     /* put together the new matrix */
5605     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5606 
5607     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5608     /* Since these are PETSc arrays, change flags to free them as necessary. */
5609     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5610     b_oth->free_a  = PETSC_TRUE;
5611     b_oth->free_ij = PETSC_TRUE;
5612     b_oth->nonew   = 0;
5613 
5614     ierr = PetscFree(bufj);CHKERRQ(ierr);
5615     if (!startsj_s || !bufa_ptr) {
5616       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5617       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5618     } else {
5619       *startsj_s = sstartsj;
5620       *startsj_r = rstartsj;
5621       *bufa_ptr  = bufa;
5622     }
5623   }
5624   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5625   PetscFunctionReturn(0);
5626 }
5627 
5628 #undef __FUNCT__
5629 #define __FUNCT__ "MatGetCommunicationStructs"
5630 /*@C
5631   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5632 
5633   Not Collective
5634 
5635   Input Parameters:
5636 . A - The matrix in mpiaij format
5637 
5638   Output Parameter:
5639 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5640 . colmap - A map from global column index to local index into lvec
5641 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5642 
5643   Level: developer
5644 
5645 @*/
5646 #if defined(PETSC_USE_CTABLE)
5647 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5648 #else
5649 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5650 #endif
5651 {
5652   Mat_MPIAIJ *a;
5653 
5654   PetscFunctionBegin;
5655   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5656   PetscValidPointer(lvec, 2);
5657   PetscValidPointer(colmap, 3);
5658   PetscValidPointer(multScatter, 4);
5659   a = (Mat_MPIAIJ*) A->data;
5660   if (lvec) *lvec = a->lvec;
5661   if (colmap) *colmap = a->colmap;
5662   if (multScatter) *multScatter = a->Mvctx;
5663   PetscFunctionReturn(0);
5664 }
5665 
5666 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5667 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5668 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5669 
5670 #undef __FUNCT__
5671 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5672 /*
5673     Computes (B'*A')' since computing B*A directly is untenable
5674 
5675                n                       p                          p
5676         (              )       (              )         (                  )
5677       m (      A       )  *  n (       B      )   =   m (         C        )
5678         (              )       (              )         (                  )
5679 
5680 */
5681 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5682 {
5683   PetscErrorCode ierr;
5684   Mat            At,Bt,Ct;
5685 
5686   PetscFunctionBegin;
5687   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5688   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5689   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5690   ierr = MatDestroy(&At);CHKERRQ(ierr);
5691   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5692   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5693   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5694   PetscFunctionReturn(0);
5695 }
5696 
5697 #undef __FUNCT__
5698 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5699 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5700 {
5701   PetscErrorCode ierr;
5702   PetscInt       m=A->rmap->n,n=B->cmap->n;
5703   Mat            Cmat;
5704 
5705   PetscFunctionBegin;
5706   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5707   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5708   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5709   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5710   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5711   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5712   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5713   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5714 
5715   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5716 
5717   *C = Cmat;
5718   PetscFunctionReturn(0);
5719 }
5720 
5721 /* ----------------------------------------------------------------*/
5722 #undef __FUNCT__
5723 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5724 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5725 {
5726   PetscErrorCode ierr;
5727 
5728   PetscFunctionBegin;
5729   if (scall == MAT_INITIAL_MATRIX) {
5730     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5731     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5732     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5733   }
5734   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5735   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5736   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5737   PetscFunctionReturn(0);
5738 }
5739 
5740 #if defined(PETSC_HAVE_MUMPS)
5741 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5742 #endif
5743 #if defined(PETSC_HAVE_PASTIX)
5744 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5745 #endif
5746 #if defined(PETSC_HAVE_SUPERLU_DIST)
5747 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5748 #endif
5749 #if defined(PETSC_HAVE_CLIQUE)
5750 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5751 #endif
5752 
5753 /*MC
5754    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5755 
5756    Options Database Keys:
5757 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5758 
5759   Level: beginner
5760 
5761 .seealso: MatCreateAIJ()
5762 M*/
5763 
5764 #undef __FUNCT__
5765 #define __FUNCT__ "MatCreate_MPIAIJ"
5766 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5767 {
5768   Mat_MPIAIJ     *b;
5769   PetscErrorCode ierr;
5770   PetscMPIInt    size;
5771 
5772   PetscFunctionBegin;
5773   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5774 
5775   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5776   B->data       = (void*)b;
5777   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5778   B->assembled  = PETSC_FALSE;
5779   B->insertmode = NOT_SET_VALUES;
5780   b->size       = size;
5781 
5782   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5783 
5784   /* build cache for off array entries formed */
5785   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5786 
5787   b->donotstash  = PETSC_FALSE;
5788   b->colmap      = 0;
5789   b->garray      = 0;
5790   b->roworiented = PETSC_TRUE;
5791 
5792   /* stuff used for matrix vector multiply */
5793   b->lvec  = NULL;
5794   b->Mvctx = NULL;
5795 
5796   /* stuff for MatGetRow() */
5797   b->rowindices   = 0;
5798   b->rowvalues    = 0;
5799   b->getrowactive = PETSC_FALSE;
5800 
5801   /* flexible pointer used in CUSP/CUSPARSE classes */
5802   b->spptr = NULL;
5803 
5804 #if defined(PETSC_HAVE_MUMPS)
5805   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5806 #endif
5807 #if defined(PETSC_HAVE_PASTIX)
5808   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5809 #endif
5810 #if defined(PETSC_HAVE_SUPERLU_DIST)
5811   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5812 #endif
5813 #if defined(PETSC_HAVE_CLIQUE)
5814   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5815 #endif
5816   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5817   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5818   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5819   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5820   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5821   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5822   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5823   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5824   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5825   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5826   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5827   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5828   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5829   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5830   PetscFunctionReturn(0);
5831 }
5832 
5833 #undef __FUNCT__
5834 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5835 /*@
5836      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5837          and "off-diagonal" part of the matrix in CSR format.
5838 
5839    Collective on MPI_Comm
5840 
5841    Input Parameters:
5842 +  comm - MPI communicator
5843 .  m - number of local rows (Cannot be PETSC_DECIDE)
5844 .  n - This value should be the same as the local size used in creating the
5845        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5846        calculated if N is given) For square matrices n is almost always m.
5847 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5848 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5849 .   i - row indices for "diagonal" portion of matrix
5850 .   j - column indices
5851 .   a - matrix values
5852 .   oi - row indices for "off-diagonal" portion of matrix
5853 .   oj - column indices
5854 -   oa - matrix values
5855 
5856    Output Parameter:
5857 .   mat - the matrix
5858 
5859    Level: advanced
5860 
5861    Notes:
5862        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5863        must free the arrays once the matrix has been destroyed and not before.
5864 
5865        The i and j indices are 0 based
5866 
5867        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5868 
5869        This sets local rows and cannot be used to set off-processor values.
5870 
5871        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5872        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5873        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5874        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5875        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5876        communication if it is known that only local entries will be set.
5877 
5878 .keywords: matrix, aij, compressed row, sparse, parallel
5879 
5880 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5881           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5882 @*/
5883 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5884 {
5885   PetscErrorCode ierr;
5886   Mat_MPIAIJ     *maij;
5887 
5888   PetscFunctionBegin;
5889   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5890   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5891   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5892   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5893   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5894   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5895   maij = (Mat_MPIAIJ*) (*mat)->data;
5896 
5897   (*mat)->preallocated = PETSC_TRUE;
5898 
5899   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5900   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5901 
5902   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5903   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5904 
5905   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5906   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5907   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5908   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5909 
5910   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5911   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5912   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5913   PetscFunctionReturn(0);
5914 }
5915 
5916 /*
5917     Special version for direct calls from Fortran
5918 */
5919 #include <petsc-private/fortranimpl.h>
5920 
5921 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5922 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5923 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5924 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5925 #endif
5926 
5927 /* Change these macros so can be used in void function */
5928 #undef CHKERRQ
5929 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5930 #undef SETERRQ2
5931 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5932 #undef SETERRQ3
5933 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5934 #undef SETERRQ
5935 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5936 
5937 #undef __FUNCT__
5938 #define __FUNCT__ "matsetvaluesmpiaij_"
5939 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5940 {
5941   Mat            mat  = *mmat;
5942   PetscInt       m    = *mm, n = *mn;
5943   InsertMode     addv = *maddv;
5944   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5945   PetscScalar    value;
5946   PetscErrorCode ierr;
5947 
5948   MatCheckPreallocated(mat,1);
5949   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5950 
5951 #if defined(PETSC_USE_DEBUG)
5952   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5953 #endif
5954   {
5955     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5956     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5957     PetscBool roworiented = aij->roworiented;
5958 
5959     /* Some Variables required in the macro */
5960     Mat        A                 = aij->A;
5961     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5962     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5963     MatScalar  *aa               = a->a;
5964     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5965     Mat        B                 = aij->B;
5966     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5967     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5968     MatScalar  *ba               = b->a;
5969 
5970     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5971     PetscInt  nonew = a->nonew;
5972     MatScalar *ap1,*ap2;
5973 
5974     PetscFunctionBegin;
5975     for (i=0; i<m; i++) {
5976       if (im[i] < 0) continue;
5977 #if defined(PETSC_USE_DEBUG)
5978       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5979 #endif
5980       if (im[i] >= rstart && im[i] < rend) {
5981         row      = im[i] - rstart;
5982         lastcol1 = -1;
5983         rp1      = aj + ai[row];
5984         ap1      = aa + ai[row];
5985         rmax1    = aimax[row];
5986         nrow1    = ailen[row];
5987         low1     = 0;
5988         high1    = nrow1;
5989         lastcol2 = -1;
5990         rp2      = bj + bi[row];
5991         ap2      = ba + bi[row];
5992         rmax2    = bimax[row];
5993         nrow2    = bilen[row];
5994         low2     = 0;
5995         high2    = nrow2;
5996 
5997         for (j=0; j<n; j++) {
5998           if (roworiented) value = v[i*n+j];
5999           else value = v[i+j*m];
6000           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
6001           if (in[j] >= cstart && in[j] < cend) {
6002             col = in[j] - cstart;
6003             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
6004           } else if (in[j] < 0) continue;
6005 #if defined(PETSC_USE_DEBUG)
6006           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6007 #endif
6008           else {
6009             if (mat->was_assembled) {
6010               if (!aij->colmap) {
6011                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6012               }
6013 #if defined(PETSC_USE_CTABLE)
6014               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6015               col--;
6016 #else
6017               col = aij->colmap[in[j]] - 1;
6018 #endif
6019               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6020                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6021                 col  =  in[j];
6022                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6023                 B     = aij->B;
6024                 b     = (Mat_SeqAIJ*)B->data;
6025                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6026                 rp2   = bj + bi[row];
6027                 ap2   = ba + bi[row];
6028                 rmax2 = bimax[row];
6029                 nrow2 = bilen[row];
6030                 low2  = 0;
6031                 high2 = nrow2;
6032                 bm    = aij->B->rmap->n;
6033                 ba    = b->a;
6034               }
6035             } else col = in[j];
6036             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
6037           }
6038         }
6039       } else if (!aij->donotstash) {
6040         if (roworiented) {
6041           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6042         } else {
6043           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6044         }
6045       }
6046     }
6047   }
6048   PetscFunctionReturnVoid();
6049 }
6050 
6051