xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3c79b8e773c7db238ae3ab10383e74801ec22290)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatDestroy_MatRedundant"
2512 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2513 {
2514   PetscErrorCode ierr;
2515   Mat_Redundant  *redund;
2516   PetscInt       i;
2517   PetscMPIInt    size;
2518 
2519   PetscFunctionBegin;
2520   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2521   if (size == 1) {
2522     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2523     redund = a->redundant;
2524   } else {
2525     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2526     redund = a->redundant;
2527   }
2528   if (redund){
2529     ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2530     ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2531     ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2532     for (i=0; i<redund->nrecvs; i++) {
2533       ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2534       ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2535     }
2536     ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2537 
2538     if (redund->psubcomm) {
2539       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2540     }
2541 
2542     ierr = redund->Destroy(A);CHKERRQ(ierr);
2543     ierr = PetscFree(redund);CHKERRQ(ierr);
2544   }
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 #undef __FUNCT__
2549 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm"
2550 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2551 {
2552   PetscMPIInt    rank,size;
2553   MPI_Comm       comm,subcomm=psubcomm->comm;
2554   PetscErrorCode ierr;
2555   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2556   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2557   PetscInt       *rowrange = mat->rmap->range;
2558   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2559   Mat            A = aij->A,B=aij->B,C=*matredundant;
2560   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2561   PetscScalar    *sbuf_a;
2562   PetscInt       nzlocal=a->nz+b->nz;
2563   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2564   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2565   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2566   MatScalar      *aworkA,*aworkB;
2567   PetscScalar    *vals;
2568   PetscMPIInt    tag1,tag2,tag3,imdex;
2569   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2570   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2571   MPI_Status     recv_status,*send_status;
2572   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2573   PetscInt       **rbuf_j=NULL;
2574   PetscScalar    **rbuf_a=NULL;
2575   Mat_Redundant  *redund =NULL;
2576   PetscBool      flg=PETSC_FALSE;
2577 
2578   PetscFunctionBegin;
2579   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2580   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2581   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2582   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2583   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2584 
2585   /* ---------- new imples: use MatGetSubMatrices() ------------*/
2586   ierr = PetscOptionsGetBool(NULL,"-new",&flg,NULL);CHKERRQ(ierr);
2587   if (flg) {
2588     Mat        *matseq;
2589     IS         isrow,iscol;
2590     PetscInt   mloc_sub,rstart,rend;
2591 
2592     if (reuse == MAT_INITIAL_MATRIX) {
2593       /* create a local sequential matrix matseq[0] */
2594       ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2595       ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2596       ierr = MatSetUp(C);CHKERRQ(ierr);
2597       ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2598       ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2599       ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2600 
2601       ierr = MatGetOwnershipRange(C,&rstart,&rend);CHKERRQ(ierr);
2602       ierr = MatDestroy(&C);CHKERRQ(ierr);
2603       mloc_sub = rend - rstart;
2604 
2605       printf("[%d] Use MatGetSubMatrices()...rows %d - %d, mloc_sub %d\n",rank,rstart,rend,mloc_sub);
2606       ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2607       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2608     } else { /* reuse == MAT_REUSE_MATRIX */
2609       if (subsize == 1) {
2610         Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2611         redund = c->redundant;
2612       } else {
2613         Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2614         redund = c->redundant;
2615       }
2616 
2617       isrow = redund->isrow;
2618       iscol = redund->iscol;
2619       matseq = redund->matseq;
2620     }
2621 
2622     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2623     if (rank==0) {
2624       printf("[%d] matsub:\n",rank);
2625       ierr = MatView(matseq[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
2626     }
2627     ierr = MPI_Barrier(comm);CHKERRQ(ierr);
2628 
2629     /* Create matredundant by concatenating matseq[0] from processors in this subcomm */
2630     if (reuse == MAT_REUSE_MATRIX) {
2631       if (!rank) printf("matredundant:\n");
2632       ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
2633     }
2634     ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2635 
2636     if (nsubcomm == 1) {
2637       if (!rank) printf( "matredundant\n");
2638       ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
2639     }
2640     /*
2641     ierr = MatDestroy(&matseq[0]);CHKERRQ(ierr);
2642     ierr = PetscFree(matseq);CHKERRQ(ierr);
2643     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
2644     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
2645      */
2646 
2647     if (reuse == MAT_INITIAL_MATRIX) {
2648       /* create a supporting struct and attach it to C for reuse */
2649       ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr);
2650       if (subsize == 1) {
2651         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2652         c->redundant = redund;
2653       } else {
2654         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2655         c->redundant = redund;
2656       }
2657 
2658       redund->isrow = isrow;
2659       redund->iscol = iscol;
2660       redund->matseq = matseq;
2661       redund->psubcomm  = NULL;
2662 
2663       redund->Destroy               = (*matredundant)->ops->destroy;
2664       (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2665     }
2666     PetscFunctionReturn(0);
2667   }
2668   /* ----------------------------------------------------*/
2669 
2670   if (reuse == MAT_REUSE_MATRIX) {
2671     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2672     if (subsize == 1) {
2673       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2674       redund = c->redundant;
2675     } else {
2676       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2677       redund = c->redundant;
2678     }
2679     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2680 
2681     nsends    = redund->nsends;
2682     nrecvs    = redund->nrecvs;
2683     send_rank = redund->send_rank;
2684     recv_rank = redund->recv_rank;
2685     sbuf_nz   = redund->sbuf_nz;
2686     rbuf_nz   = redund->rbuf_nz;
2687     sbuf_j    = redund->sbuf_j;
2688     sbuf_a    = redund->sbuf_a;
2689     rbuf_j    = redund->rbuf_j;
2690     rbuf_a    = redund->rbuf_a;
2691   }
2692 
2693   if (reuse == MAT_INITIAL_MATRIX) {
2694     PetscInt    nleftover,np_subcomm;
2695 
2696     /* get the destination processors' id send_rank, nsends and nrecvs */
2697     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2698 
2699     np_subcomm = size/nsubcomm;
2700     nleftover  = size - nsubcomm*np_subcomm;
2701 
2702     nsends = 0; nrecvs = 0;
2703     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2704       /* -------------------------------------------*/
2705       for (i=0; i<size; i++) {
2706         if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2707           send_rank[nsends]   = i; nsends++;
2708           recv_rank[nrecvs++] = i;
2709           /* printf("[%d] send to and recv from [%d]\n",rank,i); */
2710         }
2711       }
2712       if (rank >= size - nleftover) { /* this proc is a leftover processor */
2713         i = size-nleftover-1;
2714         j = 0;
2715         while (j < nsubcomm - nleftover) {
2716           send_rank[nsends++] = i;
2717           i--; j++;
2718           /* printf("[%d] send to [%d]\n",rank,i); */
2719         }
2720       }
2721 
2722       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2723         for (i=0; i<nleftover; i++) {
2724           recv_rank[nrecvs++] = size-nleftover+i;
2725           /* printf("[%d] recv from [%d]\n",rank,i); */
2726         }
2727       }
2728     } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) {
2729       /* --------------------------------------------------*/
2730       /* ---------- new imples: use MatGetSubMatrices() ------------*/
2731       PetscBool flg=PETSC_FALSE;
2732       ierr = PetscOptionsGetBool(NULL,"-new",&flg,NULL);CHKERRQ(ierr);
2733       if (flg) {
2734         Mat        *matseq;
2735         IS         isrow,iscol;
2736         PetscInt   mloc_sub,rstart,rend;
2737 
2738         /* create redundant matrix */
2739         ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2740         ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2741         ierr = MatSetUp(C);CHKERRQ(ierr);
2742         ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2743         ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2744         ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2745 
2746         ierr = MatGetOwnershipRange(C,&rstart,&rend);CHKERRQ(ierr);
2747         mloc_sub = rend - rstart;
2748 
2749         printf("[%d] Use MatGetSubMatrices()...rows %d - %d, mloc_sub %d\n",rank,rstart,rend,mloc_sub);
2750         ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2751         ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2752 
2753         ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&matseq);CHKERRQ(ierr);
2754         if (rank==1) {
2755           printf("[%d] matsub:\n",rank);
2756           ierr = MatView(matseq[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
2757         }
2758 
2759         /* Create redundant matrix by concatenating local sequential
2760                  matrices from processors in this subcomm */
2761         ierr = MatDestroy(&C);CHKERRQ(ierr);
2762 
2763         ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr);
2764         *matredundant = C;
2765 
2766         if (nsubcomm == 1) {
2767           if (!rank) printf( "C\n");
2768           ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
2769         }
2770 
2771         ierr = MatDestroy(&matseq[0]);CHKERRQ(ierr);
2772         ierr = PetscFree(matseq);CHKERRQ(ierr);
2773         ierr = ISDestroy(&isrow);CHKERRQ(ierr);
2774         ierr = ISDestroy(&iscol);CHKERRQ(ierr);
2775         //ierr = MPI_Barrier(comm);CHKERRQ(ierr);
2776         //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No done yet");
2777         PetscFunctionReturn(0);
2778       }
2779 
2780       /*---------------------------------- */
2781       PetscInt color,subcommstart;
2782       subcommstart=0;
2783       for (color=0; color<nsubcomm; color++) {
2784         if (psubcomm->color != color) {
2785           for (i=0; i<psubcomm->subsize[color]; i++) {
2786             if (subrank == i) { /* my_subrank == other's subrank */
2787               send_rank[nsends++] = subcommstart+i;
2788               recv_rank[nrecvs++] = subcommstart+i;
2789               /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */
2790             }
2791           }
2792         }
2793         subcommstart += psubcomm->subsize[color];
2794       }
2795       if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */
2796         subcommstart=0;
2797         for (color=0; color<nsubcomm; color++) {
2798           subcommstart += psubcomm->subsize[color];
2799           if (psubcomm->color == color) continue;
2800           if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */
2801             send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */
2802             /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */
2803           }
2804         }
2805       }
2806 
2807       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2808         subcommstart=0;
2809         for (color=0; color<nsubcomm; color++) {
2810           subcommstart += psubcomm->subsize[color];
2811           if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */
2812             recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */
2813             /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */
2814           }
2815         }
2816       }
2817     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for PetscSubcomm type %D",psubcomm->type);
2818 
2819     /* allocate sbuf_j, sbuf_a */
2820     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2821     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2822     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2823     /*
2824     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2825     ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
2826      */
2827   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2828 
2829   /* copy mat's local entries into the buffers */
2830   if (reuse == MAT_INITIAL_MATRIX) {
2831     rownz_max = 0;
2832     rptr      = sbuf_j;
2833     cols      = sbuf_j + rend-rstart + 1;
2834     vals      = sbuf_a;
2835     rptr[0]   = 0;
2836     for (i=0; i<rend-rstart; i++) {
2837       row    = i + rstart;
2838       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2839       ncols  = nzA + nzB;
2840       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2841       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2842       /* load the column indices for this row into cols */
2843       lwrite = 0;
2844       for (l=0; l<nzB; l++) {
2845         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2846           vals[lwrite]   = aworkB[l];
2847           cols[lwrite++] = ctmp;
2848         }
2849       }
2850       for (l=0; l<nzA; l++) {
2851         vals[lwrite]   = aworkA[l];
2852         cols[lwrite++] = cstart + cworkA[l];
2853       }
2854       for (l=0; l<nzB; l++) {
2855         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2856           vals[lwrite]   = aworkB[l];
2857           cols[lwrite++] = ctmp;
2858         }
2859       }
2860       vals     += ncols;
2861       cols     += ncols;
2862       rptr[i+1] = rptr[i] + ncols;
2863       if (rownz_max < ncols) rownz_max = ncols;
2864     }
2865     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2866   } else { /* only copy matrix values into sbuf_a */
2867     rptr    = sbuf_j;
2868     vals    = sbuf_a;
2869     rptr[0] = 0;
2870     for (i=0; i<rend-rstart; i++) {
2871       row    = i + rstart;
2872       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2873       ncols  = nzA + nzB;
2874       cworkB = b->j + b->i[i];
2875       aworkA = a->a + a->i[i];
2876       aworkB = b->a + b->i[i];
2877       lwrite = 0;
2878       for (l=0; l<nzB; l++) {
2879         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2880       }
2881       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2882       for (l=0; l<nzB; l++) {
2883         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2884       }
2885       vals     += ncols;
2886       rptr[i+1] = rptr[i] + ncols;
2887     }
2888   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2889 
2890   /* send nzlocal to others, and recv other's nzlocal */
2891   /*--------------------------------------------------*/
2892   if (reuse == MAT_INITIAL_MATRIX) {
2893     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2894 
2895     s_waits2 = s_waits3 + nsends;
2896     s_waits1 = s_waits2 + nsends;
2897     r_waits1 = s_waits1 + nsends;
2898     r_waits2 = r_waits1 + nrecvs;
2899     r_waits3 = r_waits2 + nrecvs;
2900   } else {
2901     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2902 
2903     r_waits3 = s_waits3 + nsends;
2904   }
2905 
2906   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2907   if (reuse == MAT_INITIAL_MATRIX) {
2908     /* get new tags to keep the communication clean */
2909     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2910     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2911     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2912 
2913     /* post receives of other's nzlocal */
2914     for (i=0; i<nrecvs; i++) {
2915       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2916     }
2917     /* send nzlocal to others */
2918     for (i=0; i<nsends; i++) {
2919       sbuf_nz[i] = nzlocal;
2920       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2921     }
2922     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2923     count = nrecvs;
2924     while (count) {
2925       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2926 
2927       recv_rank[imdex] = recv_status.MPI_SOURCE;
2928       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2929       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2930 
2931       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2932 
2933       rbuf_nz[imdex] += i + 2;
2934 
2935       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2936       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2937       count--;
2938     }
2939     /* wait on sends of nzlocal */
2940     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2941     /* send mat->i,j to others, and recv from other's */
2942     /*------------------------------------------------*/
2943     for (i=0; i<nsends; i++) {
2944       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2945       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2946     }
2947     /* wait on receives of mat->i,j */
2948     /*------------------------------*/
2949     count = nrecvs;
2950     while (count) {
2951       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2952       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2953       count--;
2954     }
2955     /* wait on sends of mat->i,j */
2956     /*---------------------------*/
2957     if (nsends) {
2958       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2959     }
2960   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2961 
2962   /* post receives, send and receive mat->a */
2963   /*----------------------------------------*/
2964   for (imdex=0; imdex<nrecvs; imdex++) {
2965     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2966   }
2967   for (i=0; i<nsends; i++) {
2968     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2969   }
2970   count = nrecvs;
2971   while (count) {
2972     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2973     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2974     count--;
2975   }
2976   if (nsends) {
2977     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2978   }
2979 
2980   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2981 
2982   /* create redundant matrix */
2983   /*-------------------------*/
2984   if (reuse == MAT_INITIAL_MATRIX) {
2985     const PetscInt *range;
2986     PetscInt       rstart_sub,rend_sub,mloc_sub;
2987 
2988     /* compute rownz_max for preallocation */
2989     for (imdex=0; imdex<nrecvs; imdex++) {
2990       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2991       rptr = rbuf_j[imdex];
2992       for (i=0; i<j; i++) {
2993         ncols = rptr[i+1] - rptr[i];
2994         if (rownz_max < ncols) rownz_max = ncols;
2995       }
2996     }
2997 
2998     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2999 
3000     /* get local size of redundant matrix
3001        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
3002     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
3003     rstart_sub = range[nsubcomm*subrank];
3004     if (subrank+1 < subsize) { /* not the last proc in subcomm */
3005       rend_sub = range[nsubcomm*(subrank+1)];
3006     } else {
3007       rend_sub = mat->rmap->N;
3008     }
3009     mloc_sub = rend_sub - rstart_sub;
3010 
3011     if (M == N) {
3012       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
3013     } else { /* non-square matrix */
3014       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
3015     }
3016     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
3017     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
3018     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
3019     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
3020   } else {
3021     C = *matredundant;
3022   }
3023 
3024   /* insert local matrix entries */
3025   rptr = sbuf_j;
3026   cols = sbuf_j + rend-rstart + 1;
3027   vals = sbuf_a;
3028   for (i=0; i<rend-rstart; i++) {
3029     row   = i + rstart;
3030     ncols = rptr[i+1] - rptr[i];
3031     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3032     vals += ncols;
3033     cols += ncols;
3034   }
3035   /* insert received matrix entries */
3036   for (imdex=0; imdex<nrecvs; imdex++) {
3037     rstart = rowrange[recv_rank[imdex]];
3038     rend   = rowrange[recv_rank[imdex]+1];
3039     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
3040     rptr   = rbuf_j[imdex];
3041     cols   = rbuf_j[imdex] + rend-rstart + 1;
3042     vals   = rbuf_a[imdex];
3043     for (i=0; i<rend-rstart; i++) {
3044       row   = i + rstart;
3045       ncols = rptr[i+1] - rptr[i];
3046       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3047       vals += ncols;
3048       cols += ncols;
3049     }
3050   }
3051   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3052   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3053 
3054   if (reuse == MAT_INITIAL_MATRIX) {
3055     *matredundant = C;
3056 
3057     /* create a supporting struct and attach it to C for reuse */
3058     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
3059     if (subsize == 1) {
3060       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
3061       c->redundant = redund;
3062     } else {
3063       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
3064       c->redundant = redund;
3065     }
3066 
3067     redund->nzlocal   = nzlocal;
3068     redund->nsends    = nsends;
3069     redund->nrecvs    = nrecvs;
3070     redund->send_rank = send_rank;
3071     redund->recv_rank = recv_rank;
3072     redund->sbuf_nz   = sbuf_nz;
3073     redund->rbuf_nz   = rbuf_nz;
3074     redund->sbuf_j    = sbuf_j;
3075     redund->sbuf_a    = sbuf_a;
3076     redund->rbuf_j    = rbuf_j;
3077     redund->rbuf_a    = rbuf_a;
3078     redund->psubcomm  = NULL;
3079 
3080     redund->Destroy = C->ops->destroy;
3081     C->ops->destroy = MatDestroy_MatRedundant;
3082   }
3083   PetscFunctionReturn(0);
3084 }
3085 
3086 #undef __FUNCT__
3087 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
3088 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
3089 {
3090   PetscErrorCode ierr;
3091 
3092   PetscFunctionBegin;
3093   if (subcomm == MPI_COMM_NULL || subcomm == PETSC_COMM_SELF) { /* create psubcomm */
3094     MPI_Comm       comm;
3095     PetscSubcomm   psubcomm;
3096     PetscMPIInt    size,subsize;
3097 
3098     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3099     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3100     ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
3101     ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
3102     ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr);
3103     ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
3104 
3105     ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr);
3106 
3107     /* free psubcomm in MatDestroy_MatRedundant() */
3108     ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
3109     if (subsize == 1) {
3110       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
3111       c->redundant->psubcomm = psubcomm;
3112     } else {
3113       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3114       c->redundant->psubcomm = psubcomm ;
3115     }
3116   } else {
3117     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet");
3118   }
3119   PetscFunctionReturn(0);
3120 }
3121 
3122 #undef __FUNCT__
3123 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
3124 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3125 {
3126   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3127   PetscErrorCode ierr;
3128   PetscInt       i,*idxb = 0;
3129   PetscScalar    *va,*vb;
3130   Vec            vtmp;
3131 
3132   PetscFunctionBegin;
3133   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
3134   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3135   if (idx) {
3136     for (i=0; i<A->rmap->n; i++) {
3137       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3138     }
3139   }
3140 
3141   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3142   if (idx) {
3143     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3144   }
3145   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3146   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3147 
3148   for (i=0; i<A->rmap->n; i++) {
3149     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3150       va[i] = vb[i];
3151       if (idx) idx[i] = a->garray[idxb[i]];
3152     }
3153   }
3154 
3155   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3156   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3157   ierr = PetscFree(idxb);CHKERRQ(ierr);
3158   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 #undef __FUNCT__
3163 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
3164 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3165 {
3166   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3167   PetscErrorCode ierr;
3168   PetscInt       i,*idxb = 0;
3169   PetscScalar    *va,*vb;
3170   Vec            vtmp;
3171 
3172   PetscFunctionBegin;
3173   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
3174   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3175   if (idx) {
3176     for (i=0; i<A->cmap->n; i++) {
3177       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3178     }
3179   }
3180 
3181   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3182   if (idx) {
3183     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3184   }
3185   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3186   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3187 
3188   for (i=0; i<A->rmap->n; i++) {
3189     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3190       va[i] = vb[i];
3191       if (idx) idx[i] = a->garray[idxb[i]];
3192     }
3193   }
3194 
3195   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3196   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3197   ierr = PetscFree(idxb);CHKERRQ(ierr);
3198   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3199   PetscFunctionReturn(0);
3200 }
3201 
3202 #undef __FUNCT__
3203 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3204 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3205 {
3206   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3207   PetscInt       n      = A->rmap->n;
3208   PetscInt       cstart = A->cmap->rstart;
3209   PetscInt       *cmap  = mat->garray;
3210   PetscInt       *diagIdx, *offdiagIdx;
3211   Vec            diagV, offdiagV;
3212   PetscScalar    *a, *diagA, *offdiagA;
3213   PetscInt       r;
3214   PetscErrorCode ierr;
3215 
3216   PetscFunctionBegin;
3217   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3218   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3219   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3220   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3221   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3222   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3223   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3224   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3225   for (r = 0; r < n; ++r) {
3226     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3227       a[r]   = diagA[r];
3228       idx[r] = cstart + diagIdx[r];
3229     } else {
3230       a[r]   = offdiagA[r];
3231       idx[r] = cmap[offdiagIdx[r]];
3232     }
3233   }
3234   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3235   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3236   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3237   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3238   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3239   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3240   PetscFunctionReturn(0);
3241 }
3242 
3243 #undef __FUNCT__
3244 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3245 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3246 {
3247   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3248   PetscInt       n      = A->rmap->n;
3249   PetscInt       cstart = A->cmap->rstart;
3250   PetscInt       *cmap  = mat->garray;
3251   PetscInt       *diagIdx, *offdiagIdx;
3252   Vec            diagV, offdiagV;
3253   PetscScalar    *a, *diagA, *offdiagA;
3254   PetscInt       r;
3255   PetscErrorCode ierr;
3256 
3257   PetscFunctionBegin;
3258   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3259   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3260   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3261   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3262   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3263   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3264   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3265   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3266   for (r = 0; r < n; ++r) {
3267     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3268       a[r]   = diagA[r];
3269       idx[r] = cstart + diagIdx[r];
3270     } else {
3271       a[r]   = offdiagA[r];
3272       idx[r] = cmap[offdiagIdx[r]];
3273     }
3274   }
3275   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3276   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3277   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3278   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3279   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3280   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3281   PetscFunctionReturn(0);
3282 }
3283 
3284 #undef __FUNCT__
3285 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3286 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3287 {
3288   PetscErrorCode ierr;
3289   Mat            *dummy;
3290 
3291   PetscFunctionBegin;
3292   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3293   *newmat = *dummy;
3294   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3295   PetscFunctionReturn(0);
3296 }
3297 
3298 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3299 
3300 #undef __FUNCT__
3301 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3302 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3303 {
3304   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3305   PetscErrorCode ierr;
3306 
3307   PetscFunctionBegin;
3308   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3309   PetscFunctionReturn(0);
3310 }
3311 
3312 #undef __FUNCT__
3313 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3314 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3315 {
3316   PetscErrorCode ierr;
3317   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3318 
3319   PetscFunctionBegin;
3320   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3321   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3322   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3323   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3324   PetscFunctionReturn(0);
3325 }
3326 
3327 /* -------------------------------------------------------------------*/
3328 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3329                                        MatGetRow_MPIAIJ,
3330                                        MatRestoreRow_MPIAIJ,
3331                                        MatMult_MPIAIJ,
3332                                 /* 4*/ MatMultAdd_MPIAIJ,
3333                                        MatMultTranspose_MPIAIJ,
3334                                        MatMultTransposeAdd_MPIAIJ,
3335 #if defined(PETSC_HAVE_PBGL)
3336                                        MatSolve_MPIAIJ,
3337 #else
3338                                        0,
3339 #endif
3340                                        0,
3341                                        0,
3342                                 /*10*/ 0,
3343                                        0,
3344                                        0,
3345                                        MatSOR_MPIAIJ,
3346                                        MatTranspose_MPIAIJ,
3347                                 /*15*/ MatGetInfo_MPIAIJ,
3348                                        MatEqual_MPIAIJ,
3349                                        MatGetDiagonal_MPIAIJ,
3350                                        MatDiagonalScale_MPIAIJ,
3351                                        MatNorm_MPIAIJ,
3352                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3353                                        MatAssemblyEnd_MPIAIJ,
3354                                        MatSetOption_MPIAIJ,
3355                                        MatZeroEntries_MPIAIJ,
3356                                 /*24*/ MatZeroRows_MPIAIJ,
3357                                        0,
3358 #if defined(PETSC_HAVE_PBGL)
3359                                        0,
3360 #else
3361                                        0,
3362 #endif
3363                                        0,
3364                                        0,
3365                                 /*29*/ MatSetUp_MPIAIJ,
3366 #if defined(PETSC_HAVE_PBGL)
3367                                        0,
3368 #else
3369                                        0,
3370 #endif
3371                                        0,
3372                                        0,
3373                                        0,
3374                                 /*34*/ MatDuplicate_MPIAIJ,
3375                                        0,
3376                                        0,
3377                                        0,
3378                                        0,
3379                                 /*39*/ MatAXPY_MPIAIJ,
3380                                        MatGetSubMatrices_MPIAIJ,
3381                                        MatIncreaseOverlap_MPIAIJ,
3382                                        MatGetValues_MPIAIJ,
3383                                        MatCopy_MPIAIJ,
3384                                 /*44*/ MatGetRowMax_MPIAIJ,
3385                                        MatScale_MPIAIJ,
3386                                        0,
3387                                        0,
3388                                        MatZeroRowsColumns_MPIAIJ,
3389                                 /*49*/ MatSetRandom_MPIAIJ,
3390                                        0,
3391                                        0,
3392                                        0,
3393                                        0,
3394                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3395                                        0,
3396                                        MatSetUnfactored_MPIAIJ,
3397                                        MatPermute_MPIAIJ,
3398                                        0,
3399                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3400                                        MatDestroy_MPIAIJ,
3401                                        MatView_MPIAIJ,
3402                                        0,
3403                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3404                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3405                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3406                                        0,
3407                                        0,
3408                                        0,
3409                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3410                                        MatGetRowMinAbs_MPIAIJ,
3411                                        0,
3412                                        MatSetColoring_MPIAIJ,
3413                                        0,
3414                                        MatSetValuesAdifor_MPIAIJ,
3415                                 /*75*/ MatFDColoringApply_AIJ,
3416                                        0,
3417                                        0,
3418                                        0,
3419                                        MatFindZeroDiagonals_MPIAIJ,
3420                                 /*80*/ 0,
3421                                        0,
3422                                        0,
3423                                 /*83*/ MatLoad_MPIAIJ,
3424                                        0,
3425                                        0,
3426                                        0,
3427                                        0,
3428                                        0,
3429                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3430                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3431                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3432                                        MatPtAP_MPIAIJ_MPIAIJ,
3433                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3434                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3435                                        0,
3436                                        0,
3437                                        0,
3438                                        0,
3439                                 /*99*/ 0,
3440                                        0,
3441                                        0,
3442                                        MatConjugate_MPIAIJ,
3443                                        0,
3444                                 /*104*/MatSetValuesRow_MPIAIJ,
3445                                        MatRealPart_MPIAIJ,
3446                                        MatImaginaryPart_MPIAIJ,
3447                                        0,
3448                                        0,
3449                                 /*109*/0,
3450                                        MatGetRedundantMatrix_MPIAIJ,
3451                                        MatGetRowMin_MPIAIJ,
3452                                        0,
3453                                        0,
3454                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3455                                        0,
3456                                        0,
3457                                        0,
3458                                        0,
3459                                 /*119*/0,
3460                                        0,
3461                                        0,
3462                                        0,
3463                                        MatGetMultiProcBlock_MPIAIJ,
3464                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3465                                        MatGetColumnNorms_MPIAIJ,
3466                                        MatInvertBlockDiagonal_MPIAIJ,
3467                                        0,
3468                                        MatGetSubMatricesParallel_MPIAIJ,
3469                                 /*129*/0,
3470                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3471                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3472                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3473                                        0,
3474                                 /*134*/0,
3475                                        0,
3476                                        0,
3477                                        0,
3478                                        0,
3479                                 /*139*/0,
3480                                        0
3481 };
3482 
3483 /* ----------------------------------------------------------------------------------------*/
3484 
3485 #undef __FUNCT__
3486 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3487 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3488 {
3489   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3490   PetscErrorCode ierr;
3491 
3492   PetscFunctionBegin;
3493   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3494   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3495   PetscFunctionReturn(0);
3496 }
3497 
3498 #undef __FUNCT__
3499 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3500 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3501 {
3502   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3503   PetscErrorCode ierr;
3504 
3505   PetscFunctionBegin;
3506   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3507   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3508   PetscFunctionReturn(0);
3509 }
3510 
3511 #undef __FUNCT__
3512 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3513 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3514 {
3515   Mat_MPIAIJ     *b;
3516   PetscErrorCode ierr;
3517 
3518   PetscFunctionBegin;
3519   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3520   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3521   b = (Mat_MPIAIJ*)B->data;
3522 
3523   if (!B->preallocated) {
3524     /* Explicitly create 2 MATSEQAIJ matrices. */
3525     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3526     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3527     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3528     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3529     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3530     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3531     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3532     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3533     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3534     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3535   }
3536 
3537   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3538   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3539   B->preallocated = PETSC_TRUE;
3540   PetscFunctionReturn(0);
3541 }
3542 
3543 #undef __FUNCT__
3544 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3545 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3546 {
3547   Mat            mat;
3548   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3549   PetscErrorCode ierr;
3550 
3551   PetscFunctionBegin;
3552   *newmat = 0;
3553   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3554   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3555   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3556   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3557   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3558   a       = (Mat_MPIAIJ*)mat->data;
3559 
3560   mat->factortype   = matin->factortype;
3561   mat->rmap->bs     = matin->rmap->bs;
3562   mat->cmap->bs     = matin->cmap->bs;
3563   mat->assembled    = PETSC_TRUE;
3564   mat->insertmode   = NOT_SET_VALUES;
3565   mat->preallocated = PETSC_TRUE;
3566 
3567   a->size         = oldmat->size;
3568   a->rank         = oldmat->rank;
3569   a->donotstash   = oldmat->donotstash;
3570   a->roworiented  = oldmat->roworiented;
3571   a->rowindices   = 0;
3572   a->rowvalues    = 0;
3573   a->getrowactive = PETSC_FALSE;
3574 
3575   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3576   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3577 
3578   if (oldmat->colmap) {
3579 #if defined(PETSC_USE_CTABLE)
3580     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3581 #else
3582     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3583     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3584     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3585 #endif
3586   } else a->colmap = 0;
3587   if (oldmat->garray) {
3588     PetscInt len;
3589     len  = oldmat->B->cmap->n;
3590     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3591     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3592     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3593   } else a->garray = 0;
3594 
3595   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3596   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3597   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3598   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3599   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3600   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3601   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3602   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3603   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3604   *newmat = mat;
3605   PetscFunctionReturn(0);
3606 }
3607 
3608 
3609 
3610 #undef __FUNCT__
3611 #define __FUNCT__ "MatLoad_MPIAIJ"
3612 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3613 {
3614   PetscScalar    *vals,*svals;
3615   MPI_Comm       comm;
3616   PetscErrorCode ierr;
3617   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3618   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3619   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3620   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3621   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3622   int            fd;
3623   PetscInt       bs = 1;
3624 
3625   PetscFunctionBegin;
3626   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3628   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3629   if (!rank) {
3630     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3631     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3632     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3633   }
3634 
3635   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3636   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3637   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3638 
3639   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3640 
3641   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3642   M    = header[1]; N = header[2];
3643   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3644   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3645   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3646 
3647   /* If global sizes are set, check if they are consistent with that given in the file */
3648   if (sizesset) {
3649     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3650   }
3651   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3652   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3653 
3654   /* determine ownership of all (block) rows */
3655   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3656   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3657   else m = newMat->rmap->n; /* Set by user */
3658 
3659   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3660   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3661 
3662   /* First process needs enough room for process with most rows */
3663   if (!rank) {
3664     mmax = rowners[1];
3665     for (i=2; i<=size; i++) {
3666       mmax = PetscMax(mmax, rowners[i]);
3667     }
3668   } else mmax = -1;             /* unused, but compilers complain */
3669 
3670   rowners[0] = 0;
3671   for (i=2; i<=size; i++) {
3672     rowners[i] += rowners[i-1];
3673   }
3674   rstart = rowners[rank];
3675   rend   = rowners[rank+1];
3676 
3677   /* distribute row lengths to all processors */
3678   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3679   if (!rank) {
3680     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3681     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3682     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3683     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3684     for (j=0; j<m; j++) {
3685       procsnz[0] += ourlens[j];
3686     }
3687     for (i=1; i<size; i++) {
3688       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3689       /* calculate the number of nonzeros on each processor */
3690       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3691         procsnz[i] += rowlengths[j];
3692       }
3693       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3694     }
3695     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3696   } else {
3697     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3698   }
3699 
3700   if (!rank) {
3701     /* determine max buffer needed and allocate it */
3702     maxnz = 0;
3703     for (i=0; i<size; i++) {
3704       maxnz = PetscMax(maxnz,procsnz[i]);
3705     }
3706     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3707 
3708     /* read in my part of the matrix column indices  */
3709     nz   = procsnz[0];
3710     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3711     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3712 
3713     /* read in every one elses and ship off */
3714     for (i=1; i<size; i++) {
3715       nz   = procsnz[i];
3716       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3717       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3718     }
3719     ierr = PetscFree(cols);CHKERRQ(ierr);
3720   } else {
3721     /* determine buffer space needed for message */
3722     nz = 0;
3723     for (i=0; i<m; i++) {
3724       nz += ourlens[i];
3725     }
3726     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3727 
3728     /* receive message of column indices*/
3729     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3730   }
3731 
3732   /* determine column ownership if matrix is not square */
3733   if (N != M) {
3734     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3735     else n = newMat->cmap->n;
3736     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3737     cstart = cend - n;
3738   } else {
3739     cstart = rstart;
3740     cend   = rend;
3741     n      = cend - cstart;
3742   }
3743 
3744   /* loop over local rows, determining number of off diagonal entries */
3745   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3746   jj   = 0;
3747   for (i=0; i<m; i++) {
3748     for (j=0; j<ourlens[i]; j++) {
3749       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3750       jj++;
3751     }
3752   }
3753 
3754   for (i=0; i<m; i++) {
3755     ourlens[i] -= offlens[i];
3756   }
3757   if (!sizesset) {
3758     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3759   }
3760 
3761   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3762 
3763   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3764 
3765   for (i=0; i<m; i++) {
3766     ourlens[i] += offlens[i];
3767   }
3768 
3769   if (!rank) {
3770     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3771 
3772     /* read in my part of the matrix numerical values  */
3773     nz   = procsnz[0];
3774     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3775 
3776     /* insert into matrix */
3777     jj      = rstart;
3778     smycols = mycols;
3779     svals   = vals;
3780     for (i=0; i<m; i++) {
3781       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3782       smycols += ourlens[i];
3783       svals   += ourlens[i];
3784       jj++;
3785     }
3786 
3787     /* read in other processors and ship out */
3788     for (i=1; i<size; i++) {
3789       nz   = procsnz[i];
3790       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3791       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3792     }
3793     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3794   } else {
3795     /* receive numeric values */
3796     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3797 
3798     /* receive message of values*/
3799     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3800 
3801     /* insert into matrix */
3802     jj      = rstart;
3803     smycols = mycols;
3804     svals   = vals;
3805     for (i=0; i<m; i++) {
3806       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3807       smycols += ourlens[i];
3808       svals   += ourlens[i];
3809       jj++;
3810     }
3811   }
3812   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3813   ierr = PetscFree(vals);CHKERRQ(ierr);
3814   ierr = PetscFree(mycols);CHKERRQ(ierr);
3815   ierr = PetscFree(rowners);CHKERRQ(ierr);
3816   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3817   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3818   PetscFunctionReturn(0);
3819 }
3820 
3821 #undef __FUNCT__
3822 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3823 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3824 {
3825   PetscErrorCode ierr;
3826   IS             iscol_local;
3827   PetscInt       csize;
3828 
3829   PetscFunctionBegin;
3830   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3831   if (call == MAT_REUSE_MATRIX) {
3832     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3833     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3834   } else {
3835     PetscInt cbs;
3836     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3837     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3838     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3839   }
3840   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3841   if (call == MAT_INITIAL_MATRIX) {
3842     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3843     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3844   }
3845   PetscFunctionReturn(0);
3846 }
3847 
3848 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3849 #undef __FUNCT__
3850 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3851 /*
3852     Not great since it makes two copies of the submatrix, first an SeqAIJ
3853   in local and then by concatenating the local matrices the end result.
3854   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3855 
3856   Note: This requires a sequential iscol with all indices.
3857 */
3858 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3859 {
3860   PetscErrorCode ierr;
3861   PetscMPIInt    rank,size;
3862   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3863   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3864   PetscBool      allcolumns, colflag;
3865   Mat            M,Mreuse;
3866   MatScalar      *vwork,*aa;
3867   MPI_Comm       comm;
3868   Mat_SeqAIJ     *aij;
3869 
3870   PetscFunctionBegin;
3871   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3872   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3873   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3874 
3875   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3876   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3877   if (colflag && ncol == mat->cmap->N) {
3878     allcolumns = PETSC_TRUE;
3879   } else {
3880     allcolumns = PETSC_FALSE;
3881   }
3882   if (call ==  MAT_REUSE_MATRIX) {
3883     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3884     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3885     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3886   } else {
3887     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3888   }
3889 
3890   /*
3891       m - number of local rows
3892       n - number of columns (same on all processors)
3893       rstart - first row in new global matrix generated
3894   */
3895   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3896   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3897   if (call == MAT_INITIAL_MATRIX) {
3898     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3899     ii  = aij->i;
3900     jj  = aij->j;
3901 
3902     /*
3903         Determine the number of non-zeros in the diagonal and off-diagonal
3904         portions of the matrix in order to do correct preallocation
3905     */
3906 
3907     /* first get start and end of "diagonal" columns */
3908     if (csize == PETSC_DECIDE) {
3909       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3910       if (mglobal == n) { /* square matrix */
3911         nlocal = m;
3912       } else {
3913         nlocal = n/size + ((n % size) > rank);
3914       }
3915     } else {
3916       nlocal = csize;
3917     }
3918     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3919     rstart = rend - nlocal;
3920     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3921 
3922     /* next, compute all the lengths */
3923     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3924     olens = dlens + m;
3925     for (i=0; i<m; i++) {
3926       jend = ii[i+1] - ii[i];
3927       olen = 0;
3928       dlen = 0;
3929       for (j=0; j<jend; j++) {
3930         if (*jj < rstart || *jj >= rend) olen++;
3931         else dlen++;
3932         jj++;
3933       }
3934       olens[i] = olen;
3935       dlens[i] = dlen;
3936     }
3937     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3938     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3939     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3940     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3941     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3942     ierr = PetscFree(dlens);CHKERRQ(ierr);
3943   } else {
3944     PetscInt ml,nl;
3945 
3946     M    = *newmat;
3947     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3948     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3949     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3950     /*
3951          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3952        rather than the slower MatSetValues().
3953     */
3954     M->was_assembled = PETSC_TRUE;
3955     M->assembled     = PETSC_FALSE;
3956   }
3957   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3958   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3959   ii   = aij->i;
3960   jj   = aij->j;
3961   aa   = aij->a;
3962   for (i=0; i<m; i++) {
3963     row   = rstart + i;
3964     nz    = ii[i+1] - ii[i];
3965     cwork = jj;     jj += nz;
3966     vwork = aa;     aa += nz;
3967     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3968   }
3969 
3970   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3971   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3972   *newmat = M;
3973 
3974   /* save submatrix used in processor for next request */
3975   if (call ==  MAT_INITIAL_MATRIX) {
3976     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3977     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3978   }
3979   PetscFunctionReturn(0);
3980 }
3981 
3982 #undef __FUNCT__
3983 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3984 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3985 {
3986   PetscInt       m,cstart, cend,j,nnz,i,d;
3987   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3988   const PetscInt *JJ;
3989   PetscScalar    *values;
3990   PetscErrorCode ierr;
3991 
3992   PetscFunctionBegin;
3993   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3994 
3995   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3996   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3997   m      = B->rmap->n;
3998   cstart = B->cmap->rstart;
3999   cend   = B->cmap->rend;
4000   rstart = B->rmap->rstart;
4001 
4002   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
4003 
4004 #if defined(PETSC_USE_DEBUGGING)
4005   for (i=0; i<m; i++) {
4006     nnz = Ii[i+1]- Ii[i];
4007     JJ  = J + Ii[i];
4008     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
4009     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
4010     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4011   }
4012 #endif
4013 
4014   for (i=0; i<m; i++) {
4015     nnz     = Ii[i+1]- Ii[i];
4016     JJ      = J + Ii[i];
4017     nnz_max = PetscMax(nnz_max,nnz);
4018     d       = 0;
4019     for (j=0; j<nnz; j++) {
4020       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4021     }
4022     d_nnz[i] = d;
4023     o_nnz[i] = nnz - d;
4024   }
4025   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4026   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4027 
4028   if (v) values = (PetscScalar*)v;
4029   else {
4030     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
4031     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
4032   }
4033 
4034   for (i=0; i<m; i++) {
4035     ii   = i + rstart;
4036     nnz  = Ii[i+1]- Ii[i];
4037     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
4038   }
4039   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4040   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4041 
4042   if (!v) {
4043     ierr = PetscFree(values);CHKERRQ(ierr);
4044   }
4045   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4046   PetscFunctionReturn(0);
4047 }
4048 
4049 #undef __FUNCT__
4050 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
4051 /*@
4052    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4053    (the default parallel PETSc format).
4054 
4055    Collective on MPI_Comm
4056 
4057    Input Parameters:
4058 +  B - the matrix
4059 .  i - the indices into j for the start of each local row (starts with zero)
4060 .  j - the column indices for each local row (starts with zero)
4061 -  v - optional values in the matrix
4062 
4063    Level: developer
4064 
4065    Notes:
4066        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4067      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4068      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4069 
4070        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4071 
4072        The format which is used for the sparse matrix input, is equivalent to a
4073     row-major ordering.. i.e for the following matrix, the input data expected is
4074     as shown:
4075 
4076         1 0 0
4077         2 0 3     P0
4078        -------
4079         4 5 6     P1
4080 
4081      Process0 [P0]: rows_owned=[0,1]
4082         i =  {0,1,3}  [size = nrow+1  = 2+1]
4083         j =  {0,0,2}  [size = nz = 6]
4084         v =  {1,2,3}  [size = nz = 6]
4085 
4086      Process1 [P1]: rows_owned=[2]
4087         i =  {0,3}    [size = nrow+1  = 1+1]
4088         j =  {0,1,2}  [size = nz = 6]
4089         v =  {4,5,6}  [size = nz = 6]
4090 
4091 .keywords: matrix, aij, compressed row, sparse, parallel
4092 
4093 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
4094           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4095 @*/
4096 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4097 {
4098   PetscErrorCode ierr;
4099 
4100   PetscFunctionBegin;
4101   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4102   PetscFunctionReturn(0);
4103 }
4104 
4105 #undef __FUNCT__
4106 #define __FUNCT__ "MatMPIAIJSetPreallocation"
4107 /*@C
4108    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4109    (the default parallel PETSc format).  For good matrix assembly performance
4110    the user should preallocate the matrix storage by setting the parameters
4111    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4112    performance can be increased by more than a factor of 50.
4113 
4114    Collective on MPI_Comm
4115 
4116    Input Parameters:
4117 +  A - the matrix
4118 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4119            (same value is used for all local rows)
4120 .  d_nnz - array containing the number of nonzeros in the various rows of the
4121            DIAGONAL portion of the local submatrix (possibly different for each row)
4122            or NULL, if d_nz is used to specify the nonzero structure.
4123            The size of this array is equal to the number of local rows, i.e 'm'.
4124            For matrices that will be factored, you must leave room for (and set)
4125            the diagonal entry even if it is zero.
4126 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4127            submatrix (same value is used for all local rows).
4128 -  o_nnz - array containing the number of nonzeros in the various rows of the
4129            OFF-DIAGONAL portion of the local submatrix (possibly different for
4130            each row) or NULL, if o_nz is used to specify the nonzero
4131            structure. The size of this array is equal to the number
4132            of local rows, i.e 'm'.
4133 
4134    If the *_nnz parameter is given then the *_nz parameter is ignored
4135 
4136    The AIJ format (also called the Yale sparse matrix format or
4137    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4138    storage.  The stored row and column indices begin with zero.
4139    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
4140 
4141    The parallel matrix is partitioned such that the first m0 rows belong to
4142    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4143    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4144 
4145    The DIAGONAL portion of the local submatrix of a processor can be defined
4146    as the submatrix which is obtained by extraction the part corresponding to
4147    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4148    first row that belongs to the processor, r2 is the last row belonging to
4149    the this processor, and c1-c2 is range of indices of the local part of a
4150    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4151    common case of a square matrix, the row and column ranges are the same and
4152    the DIAGONAL part is also square. The remaining portion of the local
4153    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4154 
4155    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4156 
4157    You can call MatGetInfo() to get information on how effective the preallocation was;
4158    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4159    You can also run with the option -info and look for messages with the string
4160    malloc in them to see if additional memory allocation was needed.
4161 
4162    Example usage:
4163 
4164    Consider the following 8x8 matrix with 34 non-zero values, that is
4165    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4166    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4167    as follows:
4168 
4169 .vb
4170             1  2  0  |  0  3  0  |  0  4
4171     Proc0   0  5  6  |  7  0  0  |  8  0
4172             9  0 10  | 11  0  0  | 12  0
4173     -------------------------------------
4174            13  0 14  | 15 16 17  |  0  0
4175     Proc1   0 18  0  | 19 20 21  |  0  0
4176             0  0  0  | 22 23  0  | 24  0
4177     -------------------------------------
4178     Proc2  25 26 27  |  0  0 28  | 29  0
4179            30  0  0  | 31 32 33  |  0 34
4180 .ve
4181 
4182    This can be represented as a collection of submatrices as:
4183 
4184 .vb
4185       A B C
4186       D E F
4187       G H I
4188 .ve
4189 
4190    Where the submatrices A,B,C are owned by proc0, D,E,F are
4191    owned by proc1, G,H,I are owned by proc2.
4192 
4193    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4194    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4195    The 'M','N' parameters are 8,8, and have the same values on all procs.
4196 
4197    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4198    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4199    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4200    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4201    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4202    matrix, ans [DF] as another SeqAIJ matrix.
4203 
4204    When d_nz, o_nz parameters are specified, d_nz storage elements are
4205    allocated for every row of the local diagonal submatrix, and o_nz
4206    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4207    One way to choose d_nz and o_nz is to use the max nonzerors per local
4208    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4209    In this case, the values of d_nz,o_nz are:
4210 .vb
4211      proc0 : dnz = 2, o_nz = 2
4212      proc1 : dnz = 3, o_nz = 2
4213      proc2 : dnz = 1, o_nz = 4
4214 .ve
4215    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4216    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4217    for proc3. i.e we are using 12+15+10=37 storage locations to store
4218    34 values.
4219 
4220    When d_nnz, o_nnz parameters are specified, the storage is specified
4221    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4222    In the above case the values for d_nnz,o_nnz are:
4223 .vb
4224      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4225      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4226      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4227 .ve
4228    Here the space allocated is sum of all the above values i.e 34, and
4229    hence pre-allocation is perfect.
4230 
4231    Level: intermediate
4232 
4233 .keywords: matrix, aij, compressed row, sparse, parallel
4234 
4235 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4236           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4237 @*/
4238 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4239 {
4240   PetscErrorCode ierr;
4241 
4242   PetscFunctionBegin;
4243   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4244   PetscValidType(B,1);
4245   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4246   PetscFunctionReturn(0);
4247 }
4248 
4249 #undef __FUNCT__
4250 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4251 /*@
4252      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4253          CSR format the local rows.
4254 
4255    Collective on MPI_Comm
4256 
4257    Input Parameters:
4258 +  comm - MPI communicator
4259 .  m - number of local rows (Cannot be PETSC_DECIDE)
4260 .  n - This value should be the same as the local size used in creating the
4261        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4262        calculated if N is given) For square matrices n is almost always m.
4263 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4264 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4265 .   i - row indices
4266 .   j - column indices
4267 -   a - matrix values
4268 
4269    Output Parameter:
4270 .   mat - the matrix
4271 
4272    Level: intermediate
4273 
4274    Notes:
4275        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4276      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4277      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4278 
4279        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4280 
4281        The format which is used for the sparse matrix input, is equivalent to a
4282     row-major ordering.. i.e for the following matrix, the input data expected is
4283     as shown:
4284 
4285         1 0 0
4286         2 0 3     P0
4287        -------
4288         4 5 6     P1
4289 
4290      Process0 [P0]: rows_owned=[0,1]
4291         i =  {0,1,3}  [size = nrow+1  = 2+1]
4292         j =  {0,0,2}  [size = nz = 6]
4293         v =  {1,2,3}  [size = nz = 6]
4294 
4295      Process1 [P1]: rows_owned=[2]
4296         i =  {0,3}    [size = nrow+1  = 1+1]
4297         j =  {0,1,2}  [size = nz = 6]
4298         v =  {4,5,6}  [size = nz = 6]
4299 
4300 .keywords: matrix, aij, compressed row, sparse, parallel
4301 
4302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4303           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4304 @*/
4305 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4306 {
4307   PetscErrorCode ierr;
4308 
4309   PetscFunctionBegin;
4310   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4311   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4312   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4313   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4314   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4315   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4316   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4317   PetscFunctionReturn(0);
4318 }
4319 
4320 #undef __FUNCT__
4321 #define __FUNCT__ "MatCreateAIJ"
4322 /*@C
4323    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4324    (the default parallel PETSc format).  For good matrix assembly performance
4325    the user should preallocate the matrix storage by setting the parameters
4326    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4327    performance can be increased by more than a factor of 50.
4328 
4329    Collective on MPI_Comm
4330 
4331    Input Parameters:
4332 +  comm - MPI communicator
4333 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4334            This value should be the same as the local size used in creating the
4335            y vector for the matrix-vector product y = Ax.
4336 .  n - This value should be the same as the local size used in creating the
4337        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4338        calculated if N is given) For square matrices n is almost always m.
4339 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4340 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4341 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4342            (same value is used for all local rows)
4343 .  d_nnz - array containing the number of nonzeros in the various rows of the
4344            DIAGONAL portion of the local submatrix (possibly different for each row)
4345            or NULL, if d_nz is used to specify the nonzero structure.
4346            The size of this array is equal to the number of local rows, i.e 'm'.
4347 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4348            submatrix (same value is used for all local rows).
4349 -  o_nnz - array containing the number of nonzeros in the various rows of the
4350            OFF-DIAGONAL portion of the local submatrix (possibly different for
4351            each row) or NULL, if o_nz is used to specify the nonzero
4352            structure. The size of this array is equal to the number
4353            of local rows, i.e 'm'.
4354 
4355    Output Parameter:
4356 .  A - the matrix
4357 
4358    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4359    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4360    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4361 
4362    Notes:
4363    If the *_nnz parameter is given then the *_nz parameter is ignored
4364 
4365    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4366    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4367    storage requirements for this matrix.
4368 
4369    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4370    processor than it must be used on all processors that share the object for
4371    that argument.
4372 
4373    The user MUST specify either the local or global matrix dimensions
4374    (possibly both).
4375 
4376    The parallel matrix is partitioned across processors such that the
4377    first m0 rows belong to process 0, the next m1 rows belong to
4378    process 1, the next m2 rows belong to process 2 etc.. where
4379    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4380    values corresponding to [m x N] submatrix.
4381 
4382    The columns are logically partitioned with the n0 columns belonging
4383    to 0th partition, the next n1 columns belonging to the next
4384    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4385 
4386    The DIAGONAL portion of the local submatrix on any given processor
4387    is the submatrix corresponding to the rows and columns m,n
4388    corresponding to the given processor. i.e diagonal matrix on
4389    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4390    etc. The remaining portion of the local submatrix [m x (N-n)]
4391    constitute the OFF-DIAGONAL portion. The example below better
4392    illustrates this concept.
4393 
4394    For a square global matrix we define each processor's diagonal portion
4395    to be its local rows and the corresponding columns (a square submatrix);
4396    each processor's off-diagonal portion encompasses the remainder of the
4397    local matrix (a rectangular submatrix).
4398 
4399    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4400 
4401    When calling this routine with a single process communicator, a matrix of
4402    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4403    type of communicator, use the construction mechanism:
4404      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4405 
4406    By default, this format uses inodes (identical nodes) when possible.
4407    We search for consecutive rows with the same nonzero structure, thereby
4408    reusing matrix information to achieve increased efficiency.
4409 
4410    Options Database Keys:
4411 +  -mat_no_inode  - Do not use inodes
4412 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4413 -  -mat_aij_oneindex - Internally use indexing starting at 1
4414         rather than 0.  Note that when calling MatSetValues(),
4415         the user still MUST index entries starting at 0!
4416 
4417 
4418    Example usage:
4419 
4420    Consider the following 8x8 matrix with 34 non-zero values, that is
4421    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4422    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4423    as follows:
4424 
4425 .vb
4426             1  2  0  |  0  3  0  |  0  4
4427     Proc0   0  5  6  |  7  0  0  |  8  0
4428             9  0 10  | 11  0  0  | 12  0
4429     -------------------------------------
4430            13  0 14  | 15 16 17  |  0  0
4431     Proc1   0 18  0  | 19 20 21  |  0  0
4432             0  0  0  | 22 23  0  | 24  0
4433     -------------------------------------
4434     Proc2  25 26 27  |  0  0 28  | 29  0
4435            30  0  0  | 31 32 33  |  0 34
4436 .ve
4437 
4438    This can be represented as a collection of submatrices as:
4439 
4440 .vb
4441       A B C
4442       D E F
4443       G H I
4444 .ve
4445 
4446    Where the submatrices A,B,C are owned by proc0, D,E,F are
4447    owned by proc1, G,H,I are owned by proc2.
4448 
4449    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4450    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4451    The 'M','N' parameters are 8,8, and have the same values on all procs.
4452 
4453    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4454    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4455    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4456    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4457    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4458    matrix, ans [DF] as another SeqAIJ matrix.
4459 
4460    When d_nz, o_nz parameters are specified, d_nz storage elements are
4461    allocated for every row of the local diagonal submatrix, and o_nz
4462    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4463    One way to choose d_nz and o_nz is to use the max nonzerors per local
4464    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4465    In this case, the values of d_nz,o_nz are:
4466 .vb
4467      proc0 : dnz = 2, o_nz = 2
4468      proc1 : dnz = 3, o_nz = 2
4469      proc2 : dnz = 1, o_nz = 4
4470 .ve
4471    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4472    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4473    for proc3. i.e we are using 12+15+10=37 storage locations to store
4474    34 values.
4475 
4476    When d_nnz, o_nnz parameters are specified, the storage is specified
4477    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4478    In the above case the values for d_nnz,o_nnz are:
4479 .vb
4480      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4481      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4482      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4483 .ve
4484    Here the space allocated is sum of all the above values i.e 34, and
4485    hence pre-allocation is perfect.
4486 
4487    Level: intermediate
4488 
4489 .keywords: matrix, aij, compressed row, sparse, parallel
4490 
4491 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4492           MPIAIJ, MatCreateMPIAIJWithArrays()
4493 @*/
4494 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4495 {
4496   PetscErrorCode ierr;
4497   PetscMPIInt    size;
4498 
4499   PetscFunctionBegin;
4500   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4501   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4502   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4503   if (size > 1) {
4504     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4505     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4506   } else {
4507     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4508     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4509   }
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 #undef __FUNCT__
4514 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4515 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4516 {
4517   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4518 
4519   PetscFunctionBegin;
4520   *Ad     = a->A;
4521   *Ao     = a->B;
4522   *colmap = a->garray;
4523   PetscFunctionReturn(0);
4524 }
4525 
4526 #undef __FUNCT__
4527 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4528 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4529 {
4530   PetscErrorCode ierr;
4531   PetscInt       i;
4532   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4533 
4534   PetscFunctionBegin;
4535   if (coloring->ctype == IS_COLORING_GLOBAL) {
4536     ISColoringValue *allcolors,*colors;
4537     ISColoring      ocoloring;
4538 
4539     /* set coloring for diagonal portion */
4540     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4541 
4542     /* set coloring for off-diagonal portion */
4543     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4544     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4545     for (i=0; i<a->B->cmap->n; i++) {
4546       colors[i] = allcolors[a->garray[i]];
4547     }
4548     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4549     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4550     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4551     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4552   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4553     ISColoringValue *colors;
4554     PetscInt        *larray;
4555     ISColoring      ocoloring;
4556 
4557     /* set coloring for diagonal portion */
4558     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4559     for (i=0; i<a->A->cmap->n; i++) {
4560       larray[i] = i + A->cmap->rstart;
4561     }
4562     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4563     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4564     for (i=0; i<a->A->cmap->n; i++) {
4565       colors[i] = coloring->colors[larray[i]];
4566     }
4567     ierr = PetscFree(larray);CHKERRQ(ierr);
4568     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4569     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4570     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4571 
4572     /* set coloring for off-diagonal portion */
4573     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4574     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4575     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4576     for (i=0; i<a->B->cmap->n; i++) {
4577       colors[i] = coloring->colors[larray[i]];
4578     }
4579     ierr = PetscFree(larray);CHKERRQ(ierr);
4580     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4581     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4582     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4583   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4584   PetscFunctionReturn(0);
4585 }
4586 
4587 #undef __FUNCT__
4588 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4589 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4590 {
4591   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4592   PetscErrorCode ierr;
4593 
4594   PetscFunctionBegin;
4595   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4596   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4597   PetscFunctionReturn(0);
4598 }
4599 
4600 #undef __FUNCT__
4601 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4602 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4603 {
4604   PetscErrorCode ierr;
4605   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4606   PetscInt       *indx;
4607 
4608   PetscFunctionBegin;
4609   /* This routine will ONLY return MPIAIJ type matrix */
4610   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4611   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4612   if (n == PETSC_DECIDE) {
4613     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4614   }
4615   /* Check sum(n) = N */
4616   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4617   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4618 
4619   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4620   rstart -= m;
4621 
4622   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4623   for (i=0; i<m; i++) {
4624     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4625     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4626     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4627   }
4628 
4629   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4630   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4631   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4632   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4633   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4634   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4635   PetscFunctionReturn(0);
4636 }
4637 
4638 #undef __FUNCT__
4639 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4640 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4641 {
4642   PetscErrorCode ierr;
4643   PetscInt       m,N,i,rstart,nnz,Ii;
4644   PetscInt       *indx;
4645   PetscScalar    *values;
4646 
4647   PetscFunctionBegin;
4648   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4649   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4650   for (i=0; i<m; i++) {
4651     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4652     Ii   = i + rstart;
4653     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4654     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4655   }
4656   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4657   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4658   PetscFunctionReturn(0);
4659 }
4660 
4661 #undef __FUNCT__
4662 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4663 /*@
4664       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4665                  matrices from each processor
4666 
4667     Collective on MPI_Comm
4668 
4669    Input Parameters:
4670 +    comm - the communicators the parallel matrix will live on
4671 .    inmat - the input sequential matrices
4672 .    n - number of local columns (or PETSC_DECIDE)
4673 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4674 
4675    Output Parameter:
4676 .    outmat - the parallel matrix generated
4677 
4678     Level: advanced
4679 
4680    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4681 
4682 @*/
4683 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4684 {
4685   PetscErrorCode ierr;
4686 
4687   PetscFunctionBegin;
4688   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4689   if (scall == MAT_INITIAL_MATRIX) {
4690     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4691   }
4692   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4693   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4694   PetscFunctionReturn(0);
4695 }
4696 
4697 #undef __FUNCT__
4698 #define __FUNCT__ "MatFileSplit"
4699 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4700 {
4701   PetscErrorCode    ierr;
4702   PetscMPIInt       rank;
4703   PetscInt          m,N,i,rstart,nnz;
4704   size_t            len;
4705   const PetscInt    *indx;
4706   PetscViewer       out;
4707   char              *name;
4708   Mat               B;
4709   const PetscScalar *values;
4710 
4711   PetscFunctionBegin;
4712   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4713   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4714   /* Should this be the type of the diagonal block of A? */
4715   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4716   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4717   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4718   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4719   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4720   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4721   for (i=0; i<m; i++) {
4722     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4723     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4724     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4725   }
4726   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4727   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4728 
4729   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4730   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4731   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4732   sprintf(name,"%s.%d",outfile,rank);
4733   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4734   ierr = PetscFree(name);CHKERRQ(ierr);
4735   ierr = MatView(B,out);CHKERRQ(ierr);
4736   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4737   ierr = MatDestroy(&B);CHKERRQ(ierr);
4738   PetscFunctionReturn(0);
4739 }
4740 
4741 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4742 #undef __FUNCT__
4743 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4744 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4745 {
4746   PetscErrorCode      ierr;
4747   Mat_Merge_SeqsToMPI *merge;
4748   PetscContainer      container;
4749 
4750   PetscFunctionBegin;
4751   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4752   if (container) {
4753     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4754     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4755     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4756     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4757     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4758     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4759     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4760     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4761     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4762     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4763     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4764     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4765     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4766     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4767     ierr = PetscFree(merge);CHKERRQ(ierr);
4768     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4769   }
4770   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4771   PetscFunctionReturn(0);
4772 }
4773 
4774 #include <../src/mat/utils/freespace.h>
4775 #include <petscbt.h>
4776 
4777 #undef __FUNCT__
4778 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4779 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4780 {
4781   PetscErrorCode      ierr;
4782   MPI_Comm            comm;
4783   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4784   PetscMPIInt         size,rank,taga,*len_s;
4785   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4786   PetscInt            proc,m;
4787   PetscInt            **buf_ri,**buf_rj;
4788   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4789   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4790   MPI_Request         *s_waits,*r_waits;
4791   MPI_Status          *status;
4792   MatScalar           *aa=a->a;
4793   MatScalar           **abuf_r,*ba_i;
4794   Mat_Merge_SeqsToMPI *merge;
4795   PetscContainer      container;
4796 
4797   PetscFunctionBegin;
4798   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4799   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4800 
4801   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4802   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4803 
4804   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4805   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4806 
4807   bi     = merge->bi;
4808   bj     = merge->bj;
4809   buf_ri = merge->buf_ri;
4810   buf_rj = merge->buf_rj;
4811 
4812   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4813   owners = merge->rowmap->range;
4814   len_s  = merge->len_s;
4815 
4816   /* send and recv matrix values */
4817   /*-----------------------------*/
4818   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4819   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4820 
4821   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4822   for (proc=0,k=0; proc<size; proc++) {
4823     if (!len_s[proc]) continue;
4824     i    = owners[proc];
4825     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4826     k++;
4827   }
4828 
4829   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4830   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4831   ierr = PetscFree(status);CHKERRQ(ierr);
4832 
4833   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4834   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4835 
4836   /* insert mat values of mpimat */
4837   /*----------------------------*/
4838   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4839   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4840 
4841   for (k=0; k<merge->nrecv; k++) {
4842     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4843     nrows       = *(buf_ri_k[k]);
4844     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4845     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4846   }
4847 
4848   /* set values of ba */
4849   m = merge->rowmap->n;
4850   for (i=0; i<m; i++) {
4851     arow = owners[rank] + i;
4852     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4853     bnzi = bi[i+1] - bi[i];
4854     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4855 
4856     /* add local non-zero vals of this proc's seqmat into ba */
4857     anzi   = ai[arow+1] - ai[arow];
4858     aj     = a->j + ai[arow];
4859     aa     = a->a + ai[arow];
4860     nextaj = 0;
4861     for (j=0; nextaj<anzi; j++) {
4862       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4863         ba_i[j] += aa[nextaj++];
4864       }
4865     }
4866 
4867     /* add received vals into ba */
4868     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4869       /* i-th row */
4870       if (i == *nextrow[k]) {
4871         anzi   = *(nextai[k]+1) - *nextai[k];
4872         aj     = buf_rj[k] + *(nextai[k]);
4873         aa     = abuf_r[k] + *(nextai[k]);
4874         nextaj = 0;
4875         for (j=0; nextaj<anzi; j++) {
4876           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4877             ba_i[j] += aa[nextaj++];
4878           }
4879         }
4880         nextrow[k]++; nextai[k]++;
4881       }
4882     }
4883     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4884   }
4885   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4886   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4887 
4888   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4889   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4890   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4891   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4892   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4893   PetscFunctionReturn(0);
4894 }
4895 
4896 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4897 
4898 #undef __FUNCT__
4899 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4900 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4901 {
4902   PetscErrorCode      ierr;
4903   Mat                 B_mpi;
4904   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4905   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4906   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4907   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4908   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4909   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4910   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4911   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4912   MPI_Status          *status;
4913   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4914   PetscBT             lnkbt;
4915   Mat_Merge_SeqsToMPI *merge;
4916   PetscContainer      container;
4917 
4918   PetscFunctionBegin;
4919   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4920 
4921   /* make sure it is a PETSc comm */
4922   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4923   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4924   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4925 
4926   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4927   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4928 
4929   /* determine row ownership */
4930   /*---------------------------------------------------------*/
4931   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4932   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4933   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4934   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4935   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4936   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4937   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4938 
4939   m      = merge->rowmap->n;
4940   owners = merge->rowmap->range;
4941 
4942   /* determine the number of messages to send, their lengths */
4943   /*---------------------------------------------------------*/
4944   len_s = merge->len_s;
4945 
4946   len          = 0; /* length of buf_si[] */
4947   merge->nsend = 0;
4948   for (proc=0; proc<size; proc++) {
4949     len_si[proc] = 0;
4950     if (proc == rank) {
4951       len_s[proc] = 0;
4952     } else {
4953       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4954       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4955     }
4956     if (len_s[proc]) {
4957       merge->nsend++;
4958       nrows = 0;
4959       for (i=owners[proc]; i<owners[proc+1]; i++) {
4960         if (ai[i+1] > ai[i]) nrows++;
4961       }
4962       len_si[proc] = 2*(nrows+1);
4963       len         += len_si[proc];
4964     }
4965   }
4966 
4967   /* determine the number and length of messages to receive for ij-structure */
4968   /*-------------------------------------------------------------------------*/
4969   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4970   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4971 
4972   /* post the Irecv of j-structure */
4973   /*-------------------------------*/
4974   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4975   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4976 
4977   /* post the Isend of j-structure */
4978   /*--------------------------------*/
4979   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4980 
4981   for (proc=0, k=0; proc<size; proc++) {
4982     if (!len_s[proc]) continue;
4983     i    = owners[proc];
4984     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4985     k++;
4986   }
4987 
4988   /* receives and sends of j-structure are complete */
4989   /*------------------------------------------------*/
4990   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4991   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4992 
4993   /* send and recv i-structure */
4994   /*---------------------------*/
4995   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4996   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4997 
4998   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4999   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
5000   for (proc=0,k=0; proc<size; proc++) {
5001     if (!len_s[proc]) continue;
5002     /* form outgoing message for i-structure:
5003          buf_si[0]:                 nrows to be sent
5004                [1:nrows]:           row index (global)
5005                [nrows+1:2*nrows+1]: i-structure index
5006     */
5007     /*-------------------------------------------*/
5008     nrows       = len_si[proc]/2 - 1;
5009     buf_si_i    = buf_si + nrows+1;
5010     buf_si[0]   = nrows;
5011     buf_si_i[0] = 0;
5012     nrows       = 0;
5013     for (i=owners[proc]; i<owners[proc+1]; i++) {
5014       anzi = ai[i+1] - ai[i];
5015       if (anzi) {
5016         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
5017         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
5018         nrows++;
5019       }
5020     }
5021     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
5022     k++;
5023     buf_si += len_si[proc];
5024   }
5025 
5026   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
5027   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
5028 
5029   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
5030   for (i=0; i<merge->nrecv; i++) {
5031     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
5032   }
5033 
5034   ierr = PetscFree(len_si);CHKERRQ(ierr);
5035   ierr = PetscFree(len_ri);CHKERRQ(ierr);
5036   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
5037   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
5038   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
5039   ierr = PetscFree(buf_s);CHKERRQ(ierr);
5040   ierr = PetscFree(status);CHKERRQ(ierr);
5041 
5042   /* compute a local seq matrix in each processor */
5043   /*----------------------------------------------*/
5044   /* allocate bi array and free space for accumulating nonzero column info */
5045   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
5046   bi[0] = 0;
5047 
5048   /* create and initialize a linked list */
5049   nlnk = N+1;
5050   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5051 
5052   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5053   len  = ai[owners[rank+1]] - ai[owners[rank]];
5054   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
5055 
5056   current_space = free_space;
5057 
5058   /* determine symbolic info for each local row */
5059   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
5060 
5061   for (k=0; k<merge->nrecv; k++) {
5062     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5063     nrows       = *buf_ri_k[k];
5064     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5065     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5066   }
5067 
5068   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5069   len  = 0;
5070   for (i=0; i<m; i++) {
5071     bnzi = 0;
5072     /* add local non-zero cols of this proc's seqmat into lnk */
5073     arow  = owners[rank] + i;
5074     anzi  = ai[arow+1] - ai[arow];
5075     aj    = a->j + ai[arow];
5076     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5077     bnzi += nlnk;
5078     /* add received col data into lnk */
5079     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5080       if (i == *nextrow[k]) { /* i-th row */
5081         anzi  = *(nextai[k]+1) - *nextai[k];
5082         aj    = buf_rj[k] + *nextai[k];
5083         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5084         bnzi += nlnk;
5085         nextrow[k]++; nextai[k]++;
5086       }
5087     }
5088     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5089 
5090     /* if free space is not available, make more free space */
5091     if (current_space->local_remaining<bnzi) {
5092       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
5093       nspacedouble++;
5094     }
5095     /* copy data into free space, then initialize lnk */
5096     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5097     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5098 
5099     current_space->array           += bnzi;
5100     current_space->local_used      += bnzi;
5101     current_space->local_remaining -= bnzi;
5102 
5103     bi[i+1] = bi[i] + bnzi;
5104   }
5105 
5106   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5107 
5108   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
5109   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5110   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5111 
5112   /* create symbolic parallel matrix B_mpi */
5113   /*---------------------------------------*/
5114   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5115   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5116   if (n==PETSC_DECIDE) {
5117     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5118   } else {
5119     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5120   }
5121   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5122   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5123   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5124   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5125   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5126 
5127   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5128   B_mpi->assembled    = PETSC_FALSE;
5129   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5130   merge->bi           = bi;
5131   merge->bj           = bj;
5132   merge->buf_ri       = buf_ri;
5133   merge->buf_rj       = buf_rj;
5134   merge->coi          = NULL;
5135   merge->coj          = NULL;
5136   merge->owners_co    = NULL;
5137 
5138   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5139 
5140   /* attach the supporting struct to B_mpi for reuse */
5141   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5142   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5143   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5144   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5145   *mpimat = B_mpi;
5146 
5147   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5148   PetscFunctionReturn(0);
5149 }
5150 
5151 #undef __FUNCT__
5152 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
5153 /*@C
5154       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
5155                  matrices from each processor
5156 
5157     Collective on MPI_Comm
5158 
5159    Input Parameters:
5160 +    comm - the communicators the parallel matrix will live on
5161 .    seqmat - the input sequential matrices
5162 .    m - number of local rows (or PETSC_DECIDE)
5163 .    n - number of local columns (or PETSC_DECIDE)
5164 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5165 
5166    Output Parameter:
5167 .    mpimat - the parallel matrix generated
5168 
5169     Level: advanced
5170 
5171    Notes:
5172      The dimensions of the sequential matrix in each processor MUST be the same.
5173      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5174      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5175 @*/
5176 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5177 {
5178   PetscErrorCode ierr;
5179   PetscMPIInt    size;
5180 
5181   PetscFunctionBegin;
5182   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5183   if (size == 1) {
5184     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5185     if (scall == MAT_INITIAL_MATRIX) {
5186       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5187     } else {
5188       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5189     }
5190     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5191     PetscFunctionReturn(0);
5192   }
5193   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5194   if (scall == MAT_INITIAL_MATRIX) {
5195     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5196   }
5197   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5198   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5199   PetscFunctionReturn(0);
5200 }
5201 
5202 #undef __FUNCT__
5203 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5204 /*@
5205      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5206           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5207           with MatGetSize()
5208 
5209     Not Collective
5210 
5211    Input Parameters:
5212 +    A - the matrix
5213 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5214 
5215    Output Parameter:
5216 .    A_loc - the local sequential matrix generated
5217 
5218     Level: developer
5219 
5220 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5221 
5222 @*/
5223 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5224 {
5225   PetscErrorCode ierr;
5226   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5227   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5228   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5229   MatScalar      *aa=a->a,*ba=b->a,*cam;
5230   PetscScalar    *ca;
5231   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5232   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5233   PetscBool      match;
5234 
5235   PetscFunctionBegin;
5236   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5237   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5238   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5239   if (scall == MAT_INITIAL_MATRIX) {
5240     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5241     ci[0] = 0;
5242     for (i=0; i<am; i++) {
5243       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5244     }
5245     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5246     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5247     k    = 0;
5248     for (i=0; i<am; i++) {
5249       ncols_o = bi[i+1] - bi[i];
5250       ncols_d = ai[i+1] - ai[i];
5251       /* off-diagonal portion of A */
5252       for (jo=0; jo<ncols_o; jo++) {
5253         col = cmap[*bj];
5254         if (col >= cstart) break;
5255         cj[k]   = col; bj++;
5256         ca[k++] = *ba++;
5257       }
5258       /* diagonal portion of A */
5259       for (j=0; j<ncols_d; j++) {
5260         cj[k]   = cstart + *aj++;
5261         ca[k++] = *aa++;
5262       }
5263       /* off-diagonal portion of A */
5264       for (j=jo; j<ncols_o; j++) {
5265         cj[k]   = cmap[*bj++];
5266         ca[k++] = *ba++;
5267       }
5268     }
5269     /* put together the new matrix */
5270     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5271     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5272     /* Since these are PETSc arrays, change flags to free them as necessary. */
5273     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5274     mat->free_a  = PETSC_TRUE;
5275     mat->free_ij = PETSC_TRUE;
5276     mat->nonew   = 0;
5277   } else if (scall == MAT_REUSE_MATRIX) {
5278     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5279     ci = mat->i; cj = mat->j; cam = mat->a;
5280     for (i=0; i<am; i++) {
5281       /* off-diagonal portion of A */
5282       ncols_o = bi[i+1] - bi[i];
5283       for (jo=0; jo<ncols_o; jo++) {
5284         col = cmap[*bj];
5285         if (col >= cstart) break;
5286         *cam++ = *ba++; bj++;
5287       }
5288       /* diagonal portion of A */
5289       ncols_d = ai[i+1] - ai[i];
5290       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5291       /* off-diagonal portion of A */
5292       for (j=jo; j<ncols_o; j++) {
5293         *cam++ = *ba++; bj++;
5294       }
5295     }
5296   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5297   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5298   PetscFunctionReturn(0);
5299 }
5300 
5301 #undef __FUNCT__
5302 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5303 /*@C
5304      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5305 
5306     Not Collective
5307 
5308    Input Parameters:
5309 +    A - the matrix
5310 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5311 -    row, col - index sets of rows and columns to extract (or NULL)
5312 
5313    Output Parameter:
5314 .    A_loc - the local sequential matrix generated
5315 
5316     Level: developer
5317 
5318 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5319 
5320 @*/
5321 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5322 {
5323   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5324   PetscErrorCode ierr;
5325   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5326   IS             isrowa,iscola;
5327   Mat            *aloc;
5328   PetscBool      match;
5329 
5330   PetscFunctionBegin;
5331   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5332   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5333   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5334   if (!row) {
5335     start = A->rmap->rstart; end = A->rmap->rend;
5336     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5337   } else {
5338     isrowa = *row;
5339   }
5340   if (!col) {
5341     start = A->cmap->rstart;
5342     cmap  = a->garray;
5343     nzA   = a->A->cmap->n;
5344     nzB   = a->B->cmap->n;
5345     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5346     ncols = 0;
5347     for (i=0; i<nzB; i++) {
5348       if (cmap[i] < start) idx[ncols++] = cmap[i];
5349       else break;
5350     }
5351     imark = i;
5352     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5353     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5354     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5355   } else {
5356     iscola = *col;
5357   }
5358   if (scall != MAT_INITIAL_MATRIX) {
5359     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5360     aloc[0] = *A_loc;
5361   }
5362   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5363   *A_loc = aloc[0];
5364   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5365   if (!row) {
5366     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5367   }
5368   if (!col) {
5369     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5370   }
5371   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5372   PetscFunctionReturn(0);
5373 }
5374 
5375 #undef __FUNCT__
5376 #define __FUNCT__ "MatGetBrowsOfAcols"
5377 /*@C
5378     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5379 
5380     Collective on Mat
5381 
5382    Input Parameters:
5383 +    A,B - the matrices in mpiaij format
5384 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5385 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5386 
5387    Output Parameter:
5388 +    rowb, colb - index sets of rows and columns of B to extract
5389 -    B_seq - the sequential matrix generated
5390 
5391     Level: developer
5392 
5393 @*/
5394 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5395 {
5396   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5397   PetscErrorCode ierr;
5398   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5399   IS             isrowb,iscolb;
5400   Mat            *bseq=NULL;
5401 
5402   PetscFunctionBegin;
5403   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5404     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5405   }
5406   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5407 
5408   if (scall == MAT_INITIAL_MATRIX) {
5409     start = A->cmap->rstart;
5410     cmap  = a->garray;
5411     nzA   = a->A->cmap->n;
5412     nzB   = a->B->cmap->n;
5413     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5414     ncols = 0;
5415     for (i=0; i<nzB; i++) {  /* row < local row index */
5416       if (cmap[i] < start) idx[ncols++] = cmap[i];
5417       else break;
5418     }
5419     imark = i;
5420     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5421     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5422     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5423     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5424   } else {
5425     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5426     isrowb  = *rowb; iscolb = *colb;
5427     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5428     bseq[0] = *B_seq;
5429   }
5430   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5431   *B_seq = bseq[0];
5432   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5433   if (!rowb) {
5434     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5435   } else {
5436     *rowb = isrowb;
5437   }
5438   if (!colb) {
5439     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5440   } else {
5441     *colb = iscolb;
5442   }
5443   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5444   PetscFunctionReturn(0);
5445 }
5446 
5447 #undef __FUNCT__
5448 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5449 /*
5450     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5451     of the OFF-DIAGONAL portion of local A
5452 
5453     Collective on Mat
5454 
5455    Input Parameters:
5456 +    A,B - the matrices in mpiaij format
5457 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5458 
5459    Output Parameter:
5460 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5461 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5462 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5463 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5464 
5465     Level: developer
5466 
5467 */
5468 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5469 {
5470   VecScatter_MPI_General *gen_to,*gen_from;
5471   PetscErrorCode         ierr;
5472   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5473   Mat_SeqAIJ             *b_oth;
5474   VecScatter             ctx =a->Mvctx;
5475   MPI_Comm               comm;
5476   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5477   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5478   PetscScalar            *rvalues,*svalues;
5479   MatScalar              *b_otha,*bufa,*bufA;
5480   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5481   MPI_Request            *rwaits = NULL,*swaits = NULL;
5482   MPI_Status             *sstatus,rstatus;
5483   PetscMPIInt            jj;
5484   PetscInt               *cols,sbs,rbs;
5485   PetscScalar            *vals;
5486 
5487   PetscFunctionBegin;
5488   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5489   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5490     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5491   }
5492   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5493   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5494 
5495   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5496   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5497   rvalues  = gen_from->values; /* holds the length of receiving row */
5498   svalues  = gen_to->values;   /* holds the length of sending row */
5499   nrecvs   = gen_from->n;
5500   nsends   = gen_to->n;
5501 
5502   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5503   srow    = gen_to->indices;    /* local row index to be sent */
5504   sstarts = gen_to->starts;
5505   sprocs  = gen_to->procs;
5506   sstatus = gen_to->sstatus;
5507   sbs     = gen_to->bs;
5508   rstarts = gen_from->starts;
5509   rprocs  = gen_from->procs;
5510   rbs     = gen_from->bs;
5511 
5512   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5513   if (scall == MAT_INITIAL_MATRIX) {
5514     /* i-array */
5515     /*---------*/
5516     /*  post receives */
5517     for (i=0; i<nrecvs; i++) {
5518       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5519       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5520       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5521     }
5522 
5523     /* pack the outgoing message */
5524     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5525 
5526     sstartsj[0] = 0;
5527     rstartsj[0] = 0;
5528     len         = 0; /* total length of j or a array to be sent */
5529     k           = 0;
5530     for (i=0; i<nsends; i++) {
5531       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5532       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5533       for (j=0; j<nrows; j++) {
5534         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5535         for (l=0; l<sbs; l++) {
5536           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5537 
5538           rowlen[j*sbs+l] = ncols;
5539 
5540           len += ncols;
5541           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5542         }
5543         k++;
5544       }
5545       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5546 
5547       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5548     }
5549     /* recvs and sends of i-array are completed */
5550     i = nrecvs;
5551     while (i--) {
5552       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5553     }
5554     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5555 
5556     /* allocate buffers for sending j and a arrays */
5557     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5558     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5559 
5560     /* create i-array of B_oth */
5561     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5562 
5563     b_othi[0] = 0;
5564     len       = 0; /* total length of j or a array to be received */
5565     k         = 0;
5566     for (i=0; i<nrecvs; i++) {
5567       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5568       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5569       for (j=0; j<nrows; j++) {
5570         b_othi[k+1] = b_othi[k] + rowlen[j];
5571         len        += rowlen[j]; k++;
5572       }
5573       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5574     }
5575 
5576     /* allocate space for j and a arrrays of B_oth */
5577     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5578     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5579 
5580     /* j-array */
5581     /*---------*/
5582     /*  post receives of j-array */
5583     for (i=0; i<nrecvs; i++) {
5584       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5585       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5586     }
5587 
5588     /* pack the outgoing message j-array */
5589     k = 0;
5590     for (i=0; i<nsends; i++) {
5591       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5592       bufJ  = bufj+sstartsj[i];
5593       for (j=0; j<nrows; j++) {
5594         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5595         for (ll=0; ll<sbs; ll++) {
5596           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5597           for (l=0; l<ncols; l++) {
5598             *bufJ++ = cols[l];
5599           }
5600           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5601         }
5602       }
5603       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5604     }
5605 
5606     /* recvs and sends of j-array are completed */
5607     i = nrecvs;
5608     while (i--) {
5609       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5610     }
5611     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5612   } else if (scall == MAT_REUSE_MATRIX) {
5613     sstartsj = *startsj_s;
5614     rstartsj = *startsj_r;
5615     bufa     = *bufa_ptr;
5616     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5617     b_otha   = b_oth->a;
5618   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5619 
5620   /* a-array */
5621   /*---------*/
5622   /*  post receives of a-array */
5623   for (i=0; i<nrecvs; i++) {
5624     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5625     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5626   }
5627 
5628   /* pack the outgoing message a-array */
5629   k = 0;
5630   for (i=0; i<nsends; i++) {
5631     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5632     bufA  = bufa+sstartsj[i];
5633     for (j=0; j<nrows; j++) {
5634       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5635       for (ll=0; ll<sbs; ll++) {
5636         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5637         for (l=0; l<ncols; l++) {
5638           *bufA++ = vals[l];
5639         }
5640         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5641       }
5642     }
5643     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5644   }
5645   /* recvs and sends of a-array are completed */
5646   i = nrecvs;
5647   while (i--) {
5648     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5649   }
5650   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5651   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5652 
5653   if (scall == MAT_INITIAL_MATRIX) {
5654     /* put together the new matrix */
5655     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5656 
5657     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5658     /* Since these are PETSc arrays, change flags to free them as necessary. */
5659     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5660     b_oth->free_a  = PETSC_TRUE;
5661     b_oth->free_ij = PETSC_TRUE;
5662     b_oth->nonew   = 0;
5663 
5664     ierr = PetscFree(bufj);CHKERRQ(ierr);
5665     if (!startsj_s || !bufa_ptr) {
5666       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5667       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5668     } else {
5669       *startsj_s = sstartsj;
5670       *startsj_r = rstartsj;
5671       *bufa_ptr  = bufa;
5672     }
5673   }
5674   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5675   PetscFunctionReturn(0);
5676 }
5677 
5678 #undef __FUNCT__
5679 #define __FUNCT__ "MatGetCommunicationStructs"
5680 /*@C
5681   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5682 
5683   Not Collective
5684 
5685   Input Parameters:
5686 . A - The matrix in mpiaij format
5687 
5688   Output Parameter:
5689 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5690 . colmap - A map from global column index to local index into lvec
5691 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5692 
5693   Level: developer
5694 
5695 @*/
5696 #if defined(PETSC_USE_CTABLE)
5697 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5698 #else
5699 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5700 #endif
5701 {
5702   Mat_MPIAIJ *a;
5703 
5704   PetscFunctionBegin;
5705   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5706   PetscValidPointer(lvec, 2);
5707   PetscValidPointer(colmap, 3);
5708   PetscValidPointer(multScatter, 4);
5709   a = (Mat_MPIAIJ*) A->data;
5710   if (lvec) *lvec = a->lvec;
5711   if (colmap) *colmap = a->colmap;
5712   if (multScatter) *multScatter = a->Mvctx;
5713   PetscFunctionReturn(0);
5714 }
5715 
5716 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5717 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5718 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5719 
5720 #undef __FUNCT__
5721 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5722 /*
5723     Computes (B'*A')' since computing B*A directly is untenable
5724 
5725                n                       p                          p
5726         (              )       (              )         (                  )
5727       m (      A       )  *  n (       B      )   =   m (         C        )
5728         (              )       (              )         (                  )
5729 
5730 */
5731 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5732 {
5733   PetscErrorCode ierr;
5734   Mat            At,Bt,Ct;
5735 
5736   PetscFunctionBegin;
5737   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5738   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5739   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5740   ierr = MatDestroy(&At);CHKERRQ(ierr);
5741   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5742   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5743   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5744   PetscFunctionReturn(0);
5745 }
5746 
5747 #undef __FUNCT__
5748 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5749 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5750 {
5751   PetscErrorCode ierr;
5752   PetscInt       m=A->rmap->n,n=B->cmap->n;
5753   Mat            Cmat;
5754 
5755   PetscFunctionBegin;
5756   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5757   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5758   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5759   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5760   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5761   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5762   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5763   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5764 
5765   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5766 
5767   *C = Cmat;
5768   PetscFunctionReturn(0);
5769 }
5770 
5771 /* ----------------------------------------------------------------*/
5772 #undef __FUNCT__
5773 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5774 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5775 {
5776   PetscErrorCode ierr;
5777 
5778   PetscFunctionBegin;
5779   if (scall == MAT_INITIAL_MATRIX) {
5780     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5781     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5782     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5783   }
5784   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5785   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5786   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5787   PetscFunctionReturn(0);
5788 }
5789 
5790 #if defined(PETSC_HAVE_MUMPS)
5791 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5792 #endif
5793 #if defined(PETSC_HAVE_PASTIX)
5794 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5795 #endif
5796 #if defined(PETSC_HAVE_SUPERLU_DIST)
5797 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5798 #endif
5799 #if defined(PETSC_HAVE_CLIQUE)
5800 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5801 #endif
5802 
5803 /*MC
5804    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5805 
5806    Options Database Keys:
5807 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5808 
5809   Level: beginner
5810 
5811 .seealso: MatCreateAIJ()
5812 M*/
5813 
5814 #undef __FUNCT__
5815 #define __FUNCT__ "MatCreate_MPIAIJ"
5816 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5817 {
5818   Mat_MPIAIJ     *b;
5819   PetscErrorCode ierr;
5820   PetscMPIInt    size;
5821 
5822   PetscFunctionBegin;
5823   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5824 
5825   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5826   B->data       = (void*)b;
5827   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5828   B->assembled  = PETSC_FALSE;
5829   B->insertmode = NOT_SET_VALUES;
5830   b->size       = size;
5831 
5832   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5833 
5834   /* build cache for off array entries formed */
5835   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5836 
5837   b->donotstash  = PETSC_FALSE;
5838   b->colmap      = 0;
5839   b->garray      = 0;
5840   b->roworiented = PETSC_TRUE;
5841 
5842   /* stuff used for matrix vector multiply */
5843   b->lvec  = NULL;
5844   b->Mvctx = NULL;
5845 
5846   /* stuff for MatGetRow() */
5847   b->rowindices   = 0;
5848   b->rowvalues    = 0;
5849   b->getrowactive = PETSC_FALSE;
5850 
5851   /* flexible pointer used in CUSP/CUSPARSE classes */
5852   b->spptr = NULL;
5853 
5854 #if defined(PETSC_HAVE_MUMPS)
5855   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5856 #endif
5857 #if defined(PETSC_HAVE_PASTIX)
5858   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5859 #endif
5860 #if defined(PETSC_HAVE_SUPERLU_DIST)
5861   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5862 #endif
5863 #if defined(PETSC_HAVE_CLIQUE)
5864   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5865 #endif
5866   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5867   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5868   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5869   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5870   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5871   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5872   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5873   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5874   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5875   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5876   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5877   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5878   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5879   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5880   PetscFunctionReturn(0);
5881 }
5882 
5883 #undef __FUNCT__
5884 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5885 /*@
5886      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5887          and "off-diagonal" part of the matrix in CSR format.
5888 
5889    Collective on MPI_Comm
5890 
5891    Input Parameters:
5892 +  comm - MPI communicator
5893 .  m - number of local rows (Cannot be PETSC_DECIDE)
5894 .  n - This value should be the same as the local size used in creating the
5895        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5896        calculated if N is given) For square matrices n is almost always m.
5897 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5898 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5899 .   i - row indices for "diagonal" portion of matrix
5900 .   j - column indices
5901 .   a - matrix values
5902 .   oi - row indices for "off-diagonal" portion of matrix
5903 .   oj - column indices
5904 -   oa - matrix values
5905 
5906    Output Parameter:
5907 .   mat - the matrix
5908 
5909    Level: advanced
5910 
5911    Notes:
5912        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5913        must free the arrays once the matrix has been destroyed and not before.
5914 
5915        The i and j indices are 0 based
5916 
5917        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5918 
5919        This sets local rows and cannot be used to set off-processor values.
5920 
5921        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5922        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5923        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5924        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5925        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5926        communication if it is known that only local entries will be set.
5927 
5928 .keywords: matrix, aij, compressed row, sparse, parallel
5929 
5930 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5931           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5932 @*/
5933 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5934 {
5935   PetscErrorCode ierr;
5936   Mat_MPIAIJ     *maij;
5937 
5938   PetscFunctionBegin;
5939   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5940   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5941   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5942   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5943   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5944   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5945   maij = (Mat_MPIAIJ*) (*mat)->data;
5946 
5947   (*mat)->preallocated = PETSC_TRUE;
5948 
5949   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5950   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5951 
5952   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5953   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5954 
5955   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5956   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5957   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5958   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5959 
5960   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5961   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5962   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5963   PetscFunctionReturn(0);
5964 }
5965 
5966 /*
5967     Special version for direct calls from Fortran
5968 */
5969 #include <petsc-private/fortranimpl.h>
5970 
5971 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5972 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5974 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5975 #endif
5976 
5977 /* Change these macros so can be used in void function */
5978 #undef CHKERRQ
5979 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5980 #undef SETERRQ2
5981 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5982 #undef SETERRQ3
5983 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5984 #undef SETERRQ
5985 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5986 
5987 #undef __FUNCT__
5988 #define __FUNCT__ "matsetvaluesmpiaij_"
5989 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5990 {
5991   Mat            mat  = *mmat;
5992   PetscInt       m    = *mm, n = *mn;
5993   InsertMode     addv = *maddv;
5994   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5995   PetscScalar    value;
5996   PetscErrorCode ierr;
5997 
5998   MatCheckPreallocated(mat,1);
5999   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6000 
6001 #if defined(PETSC_USE_DEBUG)
6002   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6003 #endif
6004   {
6005     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6006     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6007     PetscBool roworiented = aij->roworiented;
6008 
6009     /* Some Variables required in the macro */
6010     Mat        A                 = aij->A;
6011     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6012     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6013     MatScalar  *aa               = a->a;
6014     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6015     Mat        B                 = aij->B;
6016     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6017     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6018     MatScalar  *ba               = b->a;
6019 
6020     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6021     PetscInt  nonew = a->nonew;
6022     MatScalar *ap1,*ap2;
6023 
6024     PetscFunctionBegin;
6025     for (i=0; i<m; i++) {
6026       if (im[i] < 0) continue;
6027 #if defined(PETSC_USE_DEBUG)
6028       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6029 #endif
6030       if (im[i] >= rstart && im[i] < rend) {
6031         row      = im[i] - rstart;
6032         lastcol1 = -1;
6033         rp1      = aj + ai[row];
6034         ap1      = aa + ai[row];
6035         rmax1    = aimax[row];
6036         nrow1    = ailen[row];
6037         low1     = 0;
6038         high1    = nrow1;
6039         lastcol2 = -1;
6040         rp2      = bj + bi[row];
6041         ap2      = ba + bi[row];
6042         rmax2    = bimax[row];
6043         nrow2    = bilen[row];
6044         low2     = 0;
6045         high2    = nrow2;
6046 
6047         for (j=0; j<n; j++) {
6048           if (roworiented) value = v[i*n+j];
6049           else value = v[i+j*m];
6050           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
6051           if (in[j] >= cstart && in[j] < cend) {
6052             col = in[j] - cstart;
6053             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
6054           } else if (in[j] < 0) continue;
6055 #if defined(PETSC_USE_DEBUG)
6056           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6057 #endif
6058           else {
6059             if (mat->was_assembled) {
6060               if (!aij->colmap) {
6061                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6062               }
6063 #if defined(PETSC_USE_CTABLE)
6064               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6065               col--;
6066 #else
6067               col = aij->colmap[in[j]] - 1;
6068 #endif
6069               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6070                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6071                 col  =  in[j];
6072                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6073                 B     = aij->B;
6074                 b     = (Mat_SeqAIJ*)B->data;
6075                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6076                 rp2   = bj + bi[row];
6077                 ap2   = ba + bi[row];
6078                 rmax2 = bimax[row];
6079                 nrow2 = bilen[row];
6080                 low2  = 0;
6081                 high2 = nrow2;
6082                 bm    = aij->B->rmap->n;
6083                 ba    = b->a;
6084               }
6085             } else col = in[j];
6086             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
6087           }
6088         }
6089       } else if (!aij->donotstash) {
6090         if (roworiented) {
6091           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6092         } else {
6093           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6094         }
6095       }
6096     }
6097   }
6098   PetscFunctionReturnVoid();
6099 }
6100 
6101