xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b3a4ddeeec3842067d4f44a13811ed344c305470)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatDestroy_MatRedundant"
2512 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2513 {
2514   PetscErrorCode ierr;
2515   Mat_Redundant  *redund;
2516   PetscInt       i;
2517   PetscMPIInt    size;
2518 
2519   PetscFunctionBegin;
2520   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2521   if (size == 1) {
2522     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2523     redund = a->redundant;
2524   } else {
2525     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2526     redund = a->redundant;
2527   }
2528   if (redund){
2529     ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2530     ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2531     ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2532     for (i=0; i<redund->nrecvs; i++) {
2533       ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2534       ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2535     }
2536     ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2537 
2538     if (redund->psubcomm) {
2539       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2540     }
2541 
2542     ierr = redund->Destroy(A);CHKERRQ(ierr);
2543     ierr = PetscFree(redund);CHKERRQ(ierr);
2544   }
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 #undef __FUNCT__
2549 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm"
2550 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2551 {
2552   PetscMPIInt    rank,size;
2553   MPI_Comm       comm,subcomm=psubcomm->comm;
2554   PetscErrorCode ierr;
2555   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2556   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2557   PetscInt       *rowrange = mat->rmap->range;
2558   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2559   Mat            A = aij->A,B=aij->B,C=*matredundant;
2560   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2561   PetscScalar    *sbuf_a;
2562   PetscInt       nzlocal=a->nz+b->nz;
2563   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2564   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2565   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2566   MatScalar      *aworkA,*aworkB;
2567   PetscScalar    *vals;
2568   PetscMPIInt    tag1,tag2,tag3,imdex;
2569   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2570   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2571   MPI_Status     recv_status,*send_status;
2572   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2573   PetscInt       **rbuf_j=NULL;
2574   PetscScalar    **rbuf_a=NULL;
2575   Mat_Redundant  *redund =NULL;
2576 
2577   PetscFunctionBegin;
2578   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2579   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2580   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2581   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2582   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2583 
2584   if (reuse == MAT_REUSE_MATRIX) {
2585     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2586     if (subsize == 1) {
2587       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2588       redund = c->redundant;
2589     } else {
2590       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2591       redund = c->redundant;
2592     }
2593     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2594 
2595     nsends    = redund->nsends;
2596     nrecvs    = redund->nrecvs;
2597     send_rank = redund->send_rank;
2598     recv_rank = redund->recv_rank;
2599     sbuf_nz   = redund->sbuf_nz;
2600     rbuf_nz   = redund->rbuf_nz;
2601     sbuf_j    = redund->sbuf_j;
2602     sbuf_a    = redund->sbuf_a;
2603     rbuf_j    = redund->rbuf_j;
2604     rbuf_a    = redund->rbuf_a;
2605   }
2606 
2607   if (reuse == MAT_INITIAL_MATRIX) {
2608     PetscInt    nleftover,np_subcomm;
2609 
2610     /* get the destination processors' id send_rank, nsends and nrecvs */
2611     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2612 
2613     np_subcomm = size/nsubcomm;
2614     nleftover  = size - nsubcomm*np_subcomm;
2615 
2616     nsends = 0; nrecvs = 0;
2617     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2618       /* -------------------------------------------*/
2619       for (i=0; i<size; i++) {
2620         if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2621           send_rank[nsends]   = i; nsends++;
2622           recv_rank[nrecvs++] = i;
2623           /* printf("[%d] send to and recv from [%d]\n",rank,i); */
2624         }
2625       }
2626       if (rank >= size - nleftover) { /* this proc is a leftover processor */
2627         i = size-nleftover-1;
2628         j = 0;
2629         while (j < nsubcomm - nleftover) {
2630           send_rank[nsends++] = i;
2631           i--; j++;
2632           /* printf("[%d] send to [%d]\n",rank,i); */
2633         }
2634       }
2635 
2636       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2637         for (i=0; i<nleftover; i++) {
2638           recv_rank[nrecvs++] = size-nleftover+i;
2639           /* printf("[%d] recv from [%d]\n",rank,i); */
2640         }
2641       }
2642     } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) {
2643       /* --------------------------------------------------*/
2644       PetscInt color,subcommstart;
2645       subcommstart=0;
2646       for (color=0; color<nsubcomm; color++) {
2647         if (psubcomm->color != color) {
2648           for (i=0; i<psubcomm->subsize[color]; i++) {
2649             if (subrank == i) { /* my_subrank == other's subrank */
2650               send_rank[nsends++] = subcommstart+i;
2651               recv_rank[nrecvs++] = subcommstart+i;
2652               /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */
2653             }
2654           }
2655         }
2656         subcommstart += psubcomm->subsize[color];
2657       }
2658       if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */
2659         subcommstart=0;
2660         for (color=0; color<nsubcomm; color++) {
2661           subcommstart += psubcomm->subsize[color];
2662           if (psubcomm->color == color) continue;
2663           if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */
2664             send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */
2665             /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */
2666           }
2667         }
2668       }
2669 
2670       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2671         subcommstart=0;
2672         for (color=0; color<nsubcomm; color++) {
2673           subcommstart += psubcomm->subsize[color];
2674           if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */
2675             recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */
2676             /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */
2677           }
2678         }
2679       }
2680     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for PetscSubcomm type %D",psubcomm->type);
2681 
2682     /* allocate sbuf_j, sbuf_a */
2683     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2684     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2685     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2686     /*
2687     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2688     ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
2689      */
2690   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2691 
2692   /* copy mat's local entries into the buffers */
2693   if (reuse == MAT_INITIAL_MATRIX) {
2694     rownz_max = 0;
2695     rptr      = sbuf_j;
2696     cols      = sbuf_j + rend-rstart + 1;
2697     vals      = sbuf_a;
2698     rptr[0]   = 0;
2699     for (i=0; i<rend-rstart; i++) {
2700       row    = i + rstart;
2701       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2702       ncols  = nzA + nzB;
2703       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2704       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2705       /* load the column indices for this row into cols */
2706       lwrite = 0;
2707       for (l=0; l<nzB; l++) {
2708         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2709           vals[lwrite]   = aworkB[l];
2710           cols[lwrite++] = ctmp;
2711         }
2712       }
2713       for (l=0; l<nzA; l++) {
2714         vals[lwrite]   = aworkA[l];
2715         cols[lwrite++] = cstart + cworkA[l];
2716       }
2717       for (l=0; l<nzB; l++) {
2718         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2719           vals[lwrite]   = aworkB[l];
2720           cols[lwrite++] = ctmp;
2721         }
2722       }
2723       vals     += ncols;
2724       cols     += ncols;
2725       rptr[i+1] = rptr[i] + ncols;
2726       if (rownz_max < ncols) rownz_max = ncols;
2727     }
2728     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2729   } else { /* only copy matrix values into sbuf_a */
2730     rptr    = sbuf_j;
2731     vals    = sbuf_a;
2732     rptr[0] = 0;
2733     for (i=0; i<rend-rstart; i++) {
2734       row    = i + rstart;
2735       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2736       ncols  = nzA + nzB;
2737       cworkB = b->j + b->i[i];
2738       aworkA = a->a + a->i[i];
2739       aworkB = b->a + b->i[i];
2740       lwrite = 0;
2741       for (l=0; l<nzB; l++) {
2742         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2743       }
2744       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2745       for (l=0; l<nzB; l++) {
2746         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2747       }
2748       vals     += ncols;
2749       rptr[i+1] = rptr[i] + ncols;
2750     }
2751   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2752 
2753   /* send nzlocal to others, and recv other's nzlocal */
2754   /*--------------------------------------------------*/
2755   if (reuse == MAT_INITIAL_MATRIX) {
2756     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2757 
2758     s_waits2 = s_waits3 + nsends;
2759     s_waits1 = s_waits2 + nsends;
2760     r_waits1 = s_waits1 + nsends;
2761     r_waits2 = r_waits1 + nrecvs;
2762     r_waits3 = r_waits2 + nrecvs;
2763   } else {
2764     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2765 
2766     r_waits3 = s_waits3 + nsends;
2767   }
2768 
2769   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2770   if (reuse == MAT_INITIAL_MATRIX) {
2771     /* get new tags to keep the communication clean */
2772     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2773     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2774     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2775 
2776     /* post receives of other's nzlocal */
2777     for (i=0; i<nrecvs; i++) {
2778       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2779     }
2780     /* send nzlocal to others */
2781     for (i=0; i<nsends; i++) {
2782       sbuf_nz[i] = nzlocal;
2783       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2784     }
2785     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2786     count = nrecvs;
2787     while (count) {
2788       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2789 
2790       recv_rank[imdex] = recv_status.MPI_SOURCE;
2791       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2792       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2793 
2794       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2795 
2796       rbuf_nz[imdex] += i + 2;
2797 
2798       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2799       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2800       count--;
2801     }
2802     /* wait on sends of nzlocal */
2803     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2804     /* send mat->i,j to others, and recv from other's */
2805     /*------------------------------------------------*/
2806     for (i=0; i<nsends; i++) {
2807       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2808       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2809     }
2810     /* wait on receives of mat->i,j */
2811     /*------------------------------*/
2812     count = nrecvs;
2813     while (count) {
2814       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2815       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2816       count--;
2817     }
2818     /* wait on sends of mat->i,j */
2819     /*---------------------------*/
2820     if (nsends) {
2821       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2822     }
2823   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2824 
2825   /* post receives, send and receive mat->a */
2826   /*----------------------------------------*/
2827   for (imdex=0; imdex<nrecvs; imdex++) {
2828     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2829   }
2830   for (i=0; i<nsends; i++) {
2831     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2832   }
2833   count = nrecvs;
2834   while (count) {
2835     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2836     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2837     count--;
2838   }
2839   if (nsends) {
2840     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2841   }
2842 
2843   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2844 
2845   /* create redundant matrix */
2846   /*-------------------------*/
2847   if (reuse == MAT_INITIAL_MATRIX) {
2848     const PetscInt *range;
2849     PetscInt       rstart_sub,rend_sub,mloc_sub;
2850 
2851     /* compute rownz_max for preallocation */
2852     for (imdex=0; imdex<nrecvs; imdex++) {
2853       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2854       rptr = rbuf_j[imdex];
2855       for (i=0; i<j; i++) {
2856         ncols = rptr[i+1] - rptr[i];
2857         if (rownz_max < ncols) rownz_max = ncols;
2858       }
2859     }
2860 
2861     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2862 
2863     /* get local size of redundant matrix
2864        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2865     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2866     rstart_sub = range[nsubcomm*subrank];
2867     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2868       rend_sub = range[nsubcomm*(subrank+1)];
2869     } else {
2870       rend_sub = mat->rmap->N;
2871     }
2872     mloc_sub = rend_sub - rstart_sub;
2873 
2874     if (M == N) {
2875       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2876     } else { /* non-square matrix */
2877       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2878     }
2879     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2880     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2881     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2882     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2883   } else {
2884     C = *matredundant;
2885   }
2886 
2887   /* insert local matrix entries */
2888   rptr = sbuf_j;
2889   cols = sbuf_j + rend-rstart + 1;
2890   vals = sbuf_a;
2891   for (i=0; i<rend-rstart; i++) {
2892     row   = i + rstart;
2893     ncols = rptr[i+1] - rptr[i];
2894     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2895     vals += ncols;
2896     cols += ncols;
2897   }
2898   /* insert received matrix entries */
2899   for (imdex=0; imdex<nrecvs; imdex++) {
2900     rstart = rowrange[recv_rank[imdex]];
2901     rend   = rowrange[recv_rank[imdex]+1];
2902     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2903     rptr   = rbuf_j[imdex];
2904     cols   = rbuf_j[imdex] + rend-rstart + 1;
2905     vals   = rbuf_a[imdex];
2906     for (i=0; i<rend-rstart; i++) {
2907       row   = i + rstart;
2908       ncols = rptr[i+1] - rptr[i];
2909       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2910       vals += ncols;
2911       cols += ncols;
2912     }
2913   }
2914   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2915   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2916 
2917   if (reuse == MAT_INITIAL_MATRIX) {
2918     *matredundant = C;
2919 
2920     /* create a supporting struct and attach it to C for reuse */
2921     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2922     if (subsize == 1) {
2923       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2924       c->redundant = redund;
2925     } else {
2926       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2927       c->redundant = redund;
2928     }
2929 
2930     redund->nzlocal   = nzlocal;
2931     redund->nsends    = nsends;
2932     redund->nrecvs    = nrecvs;
2933     redund->send_rank = send_rank;
2934     redund->recv_rank = recv_rank;
2935     redund->sbuf_nz   = sbuf_nz;
2936     redund->rbuf_nz   = rbuf_nz;
2937     redund->sbuf_j    = sbuf_j;
2938     redund->sbuf_a    = sbuf_a;
2939     redund->rbuf_j    = rbuf_j;
2940     redund->rbuf_a    = rbuf_a;
2941     redund->psubcomm  = NULL;
2942 
2943     redund->Destroy = C->ops->destroy;
2944     C->ops->destroy = MatDestroy_MatRedundant;
2945   }
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 #undef __FUNCT__
2950 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2951 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2952 {
2953   PetscErrorCode ierr;
2954 
2955   PetscFunctionBegin;
2956   if (subcomm == MPI_COMM_NULL || subcomm == PETSC_COMM_SELF) { /* create psubcomm */
2957     MPI_Comm       comm;
2958     PetscSubcomm   psubcomm;
2959     PetscMPIInt    size,subsize;
2960 
2961     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2962     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2963     ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2964     ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2965     ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr);
2966     ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2967 
2968     ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr);
2969 
2970     /* free psubcomm in MatDestroy_MatRedundant() */
2971     ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2972     Mat C = *matredundant;
2973     if (subsize == 1) {
2974       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2975       c->redundant->psubcomm = psubcomm;
2976     } else {
2977       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2978       c->redundant->psubcomm = psubcomm ;
2979     }
2980   } else {
2981     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet");
2982   }
2983   PetscFunctionReturn(0);
2984 }
2985 
2986 #undef __FUNCT__
2987 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2988 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2989 {
2990   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2991   PetscErrorCode ierr;
2992   PetscInt       i,*idxb = 0;
2993   PetscScalar    *va,*vb;
2994   Vec            vtmp;
2995 
2996   PetscFunctionBegin;
2997   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2998   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2999   if (idx) {
3000     for (i=0; i<A->rmap->n; i++) {
3001       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3002     }
3003   }
3004 
3005   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3006   if (idx) {
3007     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3008   }
3009   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3010   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3011 
3012   for (i=0; i<A->rmap->n; i++) {
3013     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3014       va[i] = vb[i];
3015       if (idx) idx[i] = a->garray[idxb[i]];
3016     }
3017   }
3018 
3019   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3020   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3021   ierr = PetscFree(idxb);CHKERRQ(ierr);
3022   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3023   PetscFunctionReturn(0);
3024 }
3025 
3026 #undef __FUNCT__
3027 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
3028 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3029 {
3030   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3031   PetscErrorCode ierr;
3032   PetscInt       i,*idxb = 0;
3033   PetscScalar    *va,*vb;
3034   Vec            vtmp;
3035 
3036   PetscFunctionBegin;
3037   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
3038   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3039   if (idx) {
3040     for (i=0; i<A->cmap->n; i++) {
3041       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3042     }
3043   }
3044 
3045   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3046   if (idx) {
3047     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3048   }
3049   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3050   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3051 
3052   for (i=0; i<A->rmap->n; i++) {
3053     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3054       va[i] = vb[i];
3055       if (idx) idx[i] = a->garray[idxb[i]];
3056     }
3057   }
3058 
3059   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3060   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3061   ierr = PetscFree(idxb);CHKERRQ(ierr);
3062   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3063   PetscFunctionReturn(0);
3064 }
3065 
3066 #undef __FUNCT__
3067 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3068 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3069 {
3070   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3071   PetscInt       n      = A->rmap->n;
3072   PetscInt       cstart = A->cmap->rstart;
3073   PetscInt       *cmap  = mat->garray;
3074   PetscInt       *diagIdx, *offdiagIdx;
3075   Vec            diagV, offdiagV;
3076   PetscScalar    *a, *diagA, *offdiagA;
3077   PetscInt       r;
3078   PetscErrorCode ierr;
3079 
3080   PetscFunctionBegin;
3081   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3082   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3083   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3084   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3085   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3086   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3087   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3088   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3089   for (r = 0; r < n; ++r) {
3090     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3091       a[r]   = diagA[r];
3092       idx[r] = cstart + diagIdx[r];
3093     } else {
3094       a[r]   = offdiagA[r];
3095       idx[r] = cmap[offdiagIdx[r]];
3096     }
3097   }
3098   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3099   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3100   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3101   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3102   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3103   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3104   PetscFunctionReturn(0);
3105 }
3106 
3107 #undef __FUNCT__
3108 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3109 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3110 {
3111   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3112   PetscInt       n      = A->rmap->n;
3113   PetscInt       cstart = A->cmap->rstart;
3114   PetscInt       *cmap  = mat->garray;
3115   PetscInt       *diagIdx, *offdiagIdx;
3116   Vec            diagV, offdiagV;
3117   PetscScalar    *a, *diagA, *offdiagA;
3118   PetscInt       r;
3119   PetscErrorCode ierr;
3120 
3121   PetscFunctionBegin;
3122   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3123   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3124   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3125   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3126   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3127   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3128   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3129   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3130   for (r = 0; r < n; ++r) {
3131     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3132       a[r]   = diagA[r];
3133       idx[r] = cstart + diagIdx[r];
3134     } else {
3135       a[r]   = offdiagA[r];
3136       idx[r] = cmap[offdiagIdx[r]];
3137     }
3138   }
3139   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3140   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3141   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3142   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3143   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3144   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3145   PetscFunctionReturn(0);
3146 }
3147 
3148 #undef __FUNCT__
3149 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3150 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3151 {
3152   PetscErrorCode ierr;
3153   Mat            *dummy;
3154 
3155   PetscFunctionBegin;
3156   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3157   *newmat = *dummy;
3158   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3163 
3164 #undef __FUNCT__
3165 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3166 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3167 {
3168   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3169   PetscErrorCode ierr;
3170 
3171   PetscFunctionBegin;
3172   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 #undef __FUNCT__
3177 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3178 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3179 {
3180   PetscErrorCode ierr;
3181   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3182 
3183   PetscFunctionBegin;
3184   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3185   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3186   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3187   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3188   PetscFunctionReturn(0);
3189 }
3190 
3191 /* -------------------------------------------------------------------*/
3192 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3193                                        MatGetRow_MPIAIJ,
3194                                        MatRestoreRow_MPIAIJ,
3195                                        MatMult_MPIAIJ,
3196                                 /* 4*/ MatMultAdd_MPIAIJ,
3197                                        MatMultTranspose_MPIAIJ,
3198                                        MatMultTransposeAdd_MPIAIJ,
3199 #if defined(PETSC_HAVE_PBGL)
3200                                        MatSolve_MPIAIJ,
3201 #else
3202                                        0,
3203 #endif
3204                                        0,
3205                                        0,
3206                                 /*10*/ 0,
3207                                        0,
3208                                        0,
3209                                        MatSOR_MPIAIJ,
3210                                        MatTranspose_MPIAIJ,
3211                                 /*15*/ MatGetInfo_MPIAIJ,
3212                                        MatEqual_MPIAIJ,
3213                                        MatGetDiagonal_MPIAIJ,
3214                                        MatDiagonalScale_MPIAIJ,
3215                                        MatNorm_MPIAIJ,
3216                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3217                                        MatAssemblyEnd_MPIAIJ,
3218                                        MatSetOption_MPIAIJ,
3219                                        MatZeroEntries_MPIAIJ,
3220                                 /*24*/ MatZeroRows_MPIAIJ,
3221                                        0,
3222 #if defined(PETSC_HAVE_PBGL)
3223                                        0,
3224 #else
3225                                        0,
3226 #endif
3227                                        0,
3228                                        0,
3229                                 /*29*/ MatSetUp_MPIAIJ,
3230 #if defined(PETSC_HAVE_PBGL)
3231                                        0,
3232 #else
3233                                        0,
3234 #endif
3235                                        0,
3236                                        0,
3237                                        0,
3238                                 /*34*/ MatDuplicate_MPIAIJ,
3239                                        0,
3240                                        0,
3241                                        0,
3242                                        0,
3243                                 /*39*/ MatAXPY_MPIAIJ,
3244                                        MatGetSubMatrices_MPIAIJ,
3245                                        MatIncreaseOverlap_MPIAIJ,
3246                                        MatGetValues_MPIAIJ,
3247                                        MatCopy_MPIAIJ,
3248                                 /*44*/ MatGetRowMax_MPIAIJ,
3249                                        MatScale_MPIAIJ,
3250                                        0,
3251                                        0,
3252                                        MatZeroRowsColumns_MPIAIJ,
3253                                 /*49*/ MatSetRandom_MPIAIJ,
3254                                        0,
3255                                        0,
3256                                        0,
3257                                        0,
3258                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3259                                        0,
3260                                        MatSetUnfactored_MPIAIJ,
3261                                        MatPermute_MPIAIJ,
3262                                        0,
3263                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3264                                        MatDestroy_MPIAIJ,
3265                                        MatView_MPIAIJ,
3266                                        0,
3267                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3268                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3269                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3270                                        0,
3271                                        0,
3272                                        0,
3273                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3274                                        MatGetRowMinAbs_MPIAIJ,
3275                                        0,
3276                                        MatSetColoring_MPIAIJ,
3277                                        0,
3278                                        MatSetValuesAdifor_MPIAIJ,
3279                                 /*75*/ MatFDColoringApply_AIJ,
3280                                        0,
3281                                        0,
3282                                        0,
3283                                        MatFindZeroDiagonals_MPIAIJ,
3284                                 /*80*/ 0,
3285                                        0,
3286                                        0,
3287                                 /*83*/ MatLoad_MPIAIJ,
3288                                        0,
3289                                        0,
3290                                        0,
3291                                        0,
3292                                        0,
3293                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3294                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3295                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3296                                        MatPtAP_MPIAIJ_MPIAIJ,
3297                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3298                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3299                                        0,
3300                                        0,
3301                                        0,
3302                                        0,
3303                                 /*99*/ 0,
3304                                        0,
3305                                        0,
3306                                        MatConjugate_MPIAIJ,
3307                                        0,
3308                                 /*104*/MatSetValuesRow_MPIAIJ,
3309                                        MatRealPart_MPIAIJ,
3310                                        MatImaginaryPart_MPIAIJ,
3311                                        0,
3312                                        0,
3313                                 /*109*/0,
3314                                        MatGetRedundantMatrix_MPIAIJ,
3315                                        MatGetRowMin_MPIAIJ,
3316                                        0,
3317                                        0,
3318                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3319                                        0,
3320                                        0,
3321                                        0,
3322                                        0,
3323                                 /*119*/0,
3324                                        0,
3325                                        0,
3326                                        0,
3327                                        MatGetMultiProcBlock_MPIAIJ,
3328                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3329                                        MatGetColumnNorms_MPIAIJ,
3330                                        MatInvertBlockDiagonal_MPIAIJ,
3331                                        0,
3332                                        MatGetSubMatricesParallel_MPIAIJ,
3333                                 /*129*/0,
3334                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3335                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3336                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3337                                        0,
3338                                 /*134*/0,
3339                                        0,
3340                                        0,
3341                                        0,
3342                                        0,
3343                                 /*139*/0,
3344                                        0
3345 };
3346 
3347 /* ----------------------------------------------------------------------------------------*/
3348 
3349 #undef __FUNCT__
3350 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3351 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3352 {
3353   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3354   PetscErrorCode ierr;
3355 
3356   PetscFunctionBegin;
3357   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3358   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 #undef __FUNCT__
3363 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3364 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3365 {
3366   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3367   PetscErrorCode ierr;
3368 
3369   PetscFunctionBegin;
3370   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3371   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3372   PetscFunctionReturn(0);
3373 }
3374 
3375 #undef __FUNCT__
3376 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3377 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3378 {
3379   Mat_MPIAIJ     *b;
3380   PetscErrorCode ierr;
3381 
3382   PetscFunctionBegin;
3383   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3384   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3385   b = (Mat_MPIAIJ*)B->data;
3386 
3387   if (!B->preallocated) {
3388     /* Explicitly create 2 MATSEQAIJ matrices. */
3389     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3390     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3391     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3392     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3393     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3394     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3395     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3396     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3397     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3398     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3399   }
3400 
3401   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3402   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3403   B->preallocated = PETSC_TRUE;
3404   PetscFunctionReturn(0);
3405 }
3406 
3407 #undef __FUNCT__
3408 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3409 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3410 {
3411   Mat            mat;
3412   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3413   PetscErrorCode ierr;
3414 
3415   PetscFunctionBegin;
3416   *newmat = 0;
3417   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3418   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3419   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3420   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3421   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3422   a       = (Mat_MPIAIJ*)mat->data;
3423 
3424   mat->factortype   = matin->factortype;
3425   mat->rmap->bs     = matin->rmap->bs;
3426   mat->cmap->bs     = matin->cmap->bs;
3427   mat->assembled    = PETSC_TRUE;
3428   mat->insertmode   = NOT_SET_VALUES;
3429   mat->preallocated = PETSC_TRUE;
3430 
3431   a->size         = oldmat->size;
3432   a->rank         = oldmat->rank;
3433   a->donotstash   = oldmat->donotstash;
3434   a->roworiented  = oldmat->roworiented;
3435   a->rowindices   = 0;
3436   a->rowvalues    = 0;
3437   a->getrowactive = PETSC_FALSE;
3438 
3439   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3440   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3441 
3442   if (oldmat->colmap) {
3443 #if defined(PETSC_USE_CTABLE)
3444     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3445 #else
3446     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3447     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3448     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3449 #endif
3450   } else a->colmap = 0;
3451   if (oldmat->garray) {
3452     PetscInt len;
3453     len  = oldmat->B->cmap->n;
3454     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3455     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3456     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3457   } else a->garray = 0;
3458 
3459   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3460   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3461   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3462   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3463   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3464   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3465   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3466   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3467   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3468   *newmat = mat;
3469   PetscFunctionReturn(0);
3470 }
3471 
3472 
3473 
3474 #undef __FUNCT__
3475 #define __FUNCT__ "MatLoad_MPIAIJ"
3476 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3477 {
3478   PetscScalar    *vals,*svals;
3479   MPI_Comm       comm;
3480   PetscErrorCode ierr;
3481   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3482   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3483   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3484   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3485   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3486   int            fd;
3487   PetscInt       bs = 1;
3488 
3489   PetscFunctionBegin;
3490   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3491   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3492   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3493   if (!rank) {
3494     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3495     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3496     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3497   }
3498 
3499   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3500   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3501   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3502 
3503   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3504 
3505   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3506   M    = header[1]; N = header[2];
3507   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3508   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3509   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3510 
3511   /* If global sizes are set, check if they are consistent with that given in the file */
3512   if (sizesset) {
3513     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3514   }
3515   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3516   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3517 
3518   /* determine ownership of all (block) rows */
3519   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3520   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3521   else m = newMat->rmap->n; /* Set by user */
3522 
3523   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3524   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3525 
3526   /* First process needs enough room for process with most rows */
3527   if (!rank) {
3528     mmax = rowners[1];
3529     for (i=2; i<=size; i++) {
3530       mmax = PetscMax(mmax, rowners[i]);
3531     }
3532   } else mmax = -1;             /* unused, but compilers complain */
3533 
3534   rowners[0] = 0;
3535   for (i=2; i<=size; i++) {
3536     rowners[i] += rowners[i-1];
3537   }
3538   rstart = rowners[rank];
3539   rend   = rowners[rank+1];
3540 
3541   /* distribute row lengths to all processors */
3542   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3543   if (!rank) {
3544     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3545     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3546     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3547     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3548     for (j=0; j<m; j++) {
3549       procsnz[0] += ourlens[j];
3550     }
3551     for (i=1; i<size; i++) {
3552       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3553       /* calculate the number of nonzeros on each processor */
3554       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3555         procsnz[i] += rowlengths[j];
3556       }
3557       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3558     }
3559     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3560   } else {
3561     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3562   }
3563 
3564   if (!rank) {
3565     /* determine max buffer needed and allocate it */
3566     maxnz = 0;
3567     for (i=0; i<size; i++) {
3568       maxnz = PetscMax(maxnz,procsnz[i]);
3569     }
3570     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3571 
3572     /* read in my part of the matrix column indices  */
3573     nz   = procsnz[0];
3574     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3575     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3576 
3577     /* read in every one elses and ship off */
3578     for (i=1; i<size; i++) {
3579       nz   = procsnz[i];
3580       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3581       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3582     }
3583     ierr = PetscFree(cols);CHKERRQ(ierr);
3584   } else {
3585     /* determine buffer space needed for message */
3586     nz = 0;
3587     for (i=0; i<m; i++) {
3588       nz += ourlens[i];
3589     }
3590     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3591 
3592     /* receive message of column indices*/
3593     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3594   }
3595 
3596   /* determine column ownership if matrix is not square */
3597   if (N != M) {
3598     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3599     else n = newMat->cmap->n;
3600     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3601     cstart = cend - n;
3602   } else {
3603     cstart = rstart;
3604     cend   = rend;
3605     n      = cend - cstart;
3606   }
3607 
3608   /* loop over local rows, determining number of off diagonal entries */
3609   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3610   jj   = 0;
3611   for (i=0; i<m; i++) {
3612     for (j=0; j<ourlens[i]; j++) {
3613       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3614       jj++;
3615     }
3616   }
3617 
3618   for (i=0; i<m; i++) {
3619     ourlens[i] -= offlens[i];
3620   }
3621   if (!sizesset) {
3622     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3623   }
3624 
3625   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3626 
3627   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3628 
3629   for (i=0; i<m; i++) {
3630     ourlens[i] += offlens[i];
3631   }
3632 
3633   if (!rank) {
3634     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3635 
3636     /* read in my part of the matrix numerical values  */
3637     nz   = procsnz[0];
3638     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3639 
3640     /* insert into matrix */
3641     jj      = rstart;
3642     smycols = mycols;
3643     svals   = vals;
3644     for (i=0; i<m; i++) {
3645       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3646       smycols += ourlens[i];
3647       svals   += ourlens[i];
3648       jj++;
3649     }
3650 
3651     /* read in other processors and ship out */
3652     for (i=1; i<size; i++) {
3653       nz   = procsnz[i];
3654       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3655       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3656     }
3657     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3658   } else {
3659     /* receive numeric values */
3660     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3661 
3662     /* receive message of values*/
3663     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3664 
3665     /* insert into matrix */
3666     jj      = rstart;
3667     smycols = mycols;
3668     svals   = vals;
3669     for (i=0; i<m; i++) {
3670       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3671       smycols += ourlens[i];
3672       svals   += ourlens[i];
3673       jj++;
3674     }
3675   }
3676   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3677   ierr = PetscFree(vals);CHKERRQ(ierr);
3678   ierr = PetscFree(mycols);CHKERRQ(ierr);
3679   ierr = PetscFree(rowners);CHKERRQ(ierr);
3680   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3681   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3682   PetscFunctionReturn(0);
3683 }
3684 
3685 #undef __FUNCT__
3686 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3687 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3688 {
3689   PetscErrorCode ierr;
3690   IS             iscol_local;
3691   PetscInt       csize;
3692 
3693   PetscFunctionBegin;
3694   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3695   if (call == MAT_REUSE_MATRIX) {
3696     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3697     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3698   } else {
3699     PetscInt cbs;
3700     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3701     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3702     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3703   }
3704   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3705   if (call == MAT_INITIAL_MATRIX) {
3706     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3707     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3708   }
3709   PetscFunctionReturn(0);
3710 }
3711 
3712 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3713 #undef __FUNCT__
3714 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3715 /*
3716     Not great since it makes two copies of the submatrix, first an SeqAIJ
3717   in local and then by concatenating the local matrices the end result.
3718   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3719 
3720   Note: This requires a sequential iscol with all indices.
3721 */
3722 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3723 {
3724   PetscErrorCode ierr;
3725   PetscMPIInt    rank,size;
3726   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3727   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3728   PetscBool      allcolumns, colflag;
3729   Mat            M,Mreuse;
3730   MatScalar      *vwork,*aa;
3731   MPI_Comm       comm;
3732   Mat_SeqAIJ     *aij;
3733 
3734   PetscFunctionBegin;
3735   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3736   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3737   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3738 
3739   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3740   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3741   if (colflag && ncol == mat->cmap->N) {
3742     allcolumns = PETSC_TRUE;
3743   } else {
3744     allcolumns = PETSC_FALSE;
3745   }
3746   if (call ==  MAT_REUSE_MATRIX) {
3747     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3748     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3749     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3750   } else {
3751     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3752   }
3753 
3754   /*
3755       m - number of local rows
3756       n - number of columns (same on all processors)
3757       rstart - first row in new global matrix generated
3758   */
3759   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3760   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3761   if (call == MAT_INITIAL_MATRIX) {
3762     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3763     ii  = aij->i;
3764     jj  = aij->j;
3765 
3766     /*
3767         Determine the number of non-zeros in the diagonal and off-diagonal
3768         portions of the matrix in order to do correct preallocation
3769     */
3770 
3771     /* first get start and end of "diagonal" columns */
3772     if (csize == PETSC_DECIDE) {
3773       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3774       if (mglobal == n) { /* square matrix */
3775         nlocal = m;
3776       } else {
3777         nlocal = n/size + ((n % size) > rank);
3778       }
3779     } else {
3780       nlocal = csize;
3781     }
3782     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3783     rstart = rend - nlocal;
3784     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3785 
3786     /* next, compute all the lengths */
3787     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3788     olens = dlens + m;
3789     for (i=0; i<m; i++) {
3790       jend = ii[i+1] - ii[i];
3791       olen = 0;
3792       dlen = 0;
3793       for (j=0; j<jend; j++) {
3794         if (*jj < rstart || *jj >= rend) olen++;
3795         else dlen++;
3796         jj++;
3797       }
3798       olens[i] = olen;
3799       dlens[i] = dlen;
3800     }
3801     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3802     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3803     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3804     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3805     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3806     ierr = PetscFree(dlens);CHKERRQ(ierr);
3807   } else {
3808     PetscInt ml,nl;
3809 
3810     M    = *newmat;
3811     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3812     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3813     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3814     /*
3815          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3816        rather than the slower MatSetValues().
3817     */
3818     M->was_assembled = PETSC_TRUE;
3819     M->assembled     = PETSC_FALSE;
3820   }
3821   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3822   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3823   ii   = aij->i;
3824   jj   = aij->j;
3825   aa   = aij->a;
3826   for (i=0; i<m; i++) {
3827     row   = rstart + i;
3828     nz    = ii[i+1] - ii[i];
3829     cwork = jj;     jj += nz;
3830     vwork = aa;     aa += nz;
3831     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3832   }
3833 
3834   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3835   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3836   *newmat = M;
3837 
3838   /* save submatrix used in processor for next request */
3839   if (call ==  MAT_INITIAL_MATRIX) {
3840     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3841     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3842   }
3843   PetscFunctionReturn(0);
3844 }
3845 
3846 #undef __FUNCT__
3847 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3848 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3849 {
3850   PetscInt       m,cstart, cend,j,nnz,i,d;
3851   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3852   const PetscInt *JJ;
3853   PetscScalar    *values;
3854   PetscErrorCode ierr;
3855 
3856   PetscFunctionBegin;
3857   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3858 
3859   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3860   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3861   m      = B->rmap->n;
3862   cstart = B->cmap->rstart;
3863   cend   = B->cmap->rend;
3864   rstart = B->rmap->rstart;
3865 
3866   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3867 
3868 #if defined(PETSC_USE_DEBUGGING)
3869   for (i=0; i<m; i++) {
3870     nnz = Ii[i+1]- Ii[i];
3871     JJ  = J + Ii[i];
3872     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3873     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3874     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3875   }
3876 #endif
3877 
3878   for (i=0; i<m; i++) {
3879     nnz     = Ii[i+1]- Ii[i];
3880     JJ      = J + Ii[i];
3881     nnz_max = PetscMax(nnz_max,nnz);
3882     d       = 0;
3883     for (j=0; j<nnz; j++) {
3884       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3885     }
3886     d_nnz[i] = d;
3887     o_nnz[i] = nnz - d;
3888   }
3889   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3890   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3891 
3892   if (v) values = (PetscScalar*)v;
3893   else {
3894     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3895     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3896   }
3897 
3898   for (i=0; i<m; i++) {
3899     ii   = i + rstart;
3900     nnz  = Ii[i+1]- Ii[i];
3901     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3902   }
3903   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3904   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3905 
3906   if (!v) {
3907     ierr = PetscFree(values);CHKERRQ(ierr);
3908   }
3909   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3910   PetscFunctionReturn(0);
3911 }
3912 
3913 #undef __FUNCT__
3914 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3915 /*@
3916    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3917    (the default parallel PETSc format).
3918 
3919    Collective on MPI_Comm
3920 
3921    Input Parameters:
3922 +  B - the matrix
3923 .  i - the indices into j for the start of each local row (starts with zero)
3924 .  j - the column indices for each local row (starts with zero)
3925 -  v - optional values in the matrix
3926 
3927    Level: developer
3928 
3929    Notes:
3930        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3931      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3932      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3933 
3934        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3935 
3936        The format which is used for the sparse matrix input, is equivalent to a
3937     row-major ordering.. i.e for the following matrix, the input data expected is
3938     as shown:
3939 
3940         1 0 0
3941         2 0 3     P0
3942        -------
3943         4 5 6     P1
3944 
3945      Process0 [P0]: rows_owned=[0,1]
3946         i =  {0,1,3}  [size = nrow+1  = 2+1]
3947         j =  {0,0,2}  [size = nz = 6]
3948         v =  {1,2,3}  [size = nz = 6]
3949 
3950      Process1 [P1]: rows_owned=[2]
3951         i =  {0,3}    [size = nrow+1  = 1+1]
3952         j =  {0,1,2}  [size = nz = 6]
3953         v =  {4,5,6}  [size = nz = 6]
3954 
3955 .keywords: matrix, aij, compressed row, sparse, parallel
3956 
3957 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3958           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3959 @*/
3960 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3961 {
3962   PetscErrorCode ierr;
3963 
3964   PetscFunctionBegin;
3965   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3966   PetscFunctionReturn(0);
3967 }
3968 
3969 #undef __FUNCT__
3970 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3971 /*@C
3972    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3973    (the default parallel PETSc format).  For good matrix assembly performance
3974    the user should preallocate the matrix storage by setting the parameters
3975    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3976    performance can be increased by more than a factor of 50.
3977 
3978    Collective on MPI_Comm
3979 
3980    Input Parameters:
3981 +  A - the matrix
3982 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3983            (same value is used for all local rows)
3984 .  d_nnz - array containing the number of nonzeros in the various rows of the
3985            DIAGONAL portion of the local submatrix (possibly different for each row)
3986            or NULL, if d_nz is used to specify the nonzero structure.
3987            The size of this array is equal to the number of local rows, i.e 'm'.
3988            For matrices that will be factored, you must leave room for (and set)
3989            the diagonal entry even if it is zero.
3990 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3991            submatrix (same value is used for all local rows).
3992 -  o_nnz - array containing the number of nonzeros in the various rows of the
3993            OFF-DIAGONAL portion of the local submatrix (possibly different for
3994            each row) or NULL, if o_nz is used to specify the nonzero
3995            structure. The size of this array is equal to the number
3996            of local rows, i.e 'm'.
3997 
3998    If the *_nnz parameter is given then the *_nz parameter is ignored
3999 
4000    The AIJ format (also called the Yale sparse matrix format or
4001    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4002    storage.  The stored row and column indices begin with zero.
4003    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
4004 
4005    The parallel matrix is partitioned such that the first m0 rows belong to
4006    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4007    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4008 
4009    The DIAGONAL portion of the local submatrix of a processor can be defined
4010    as the submatrix which is obtained by extraction the part corresponding to
4011    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4012    first row that belongs to the processor, r2 is the last row belonging to
4013    the this processor, and c1-c2 is range of indices of the local part of a
4014    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4015    common case of a square matrix, the row and column ranges are the same and
4016    the DIAGONAL part is also square. The remaining portion of the local
4017    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4018 
4019    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4020 
4021    You can call MatGetInfo() to get information on how effective the preallocation was;
4022    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4023    You can also run with the option -info and look for messages with the string
4024    malloc in them to see if additional memory allocation was needed.
4025 
4026    Example usage:
4027 
4028    Consider the following 8x8 matrix with 34 non-zero values, that is
4029    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4030    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4031    as follows:
4032 
4033 .vb
4034             1  2  0  |  0  3  0  |  0  4
4035     Proc0   0  5  6  |  7  0  0  |  8  0
4036             9  0 10  | 11  0  0  | 12  0
4037     -------------------------------------
4038            13  0 14  | 15 16 17  |  0  0
4039     Proc1   0 18  0  | 19 20 21  |  0  0
4040             0  0  0  | 22 23  0  | 24  0
4041     -------------------------------------
4042     Proc2  25 26 27  |  0  0 28  | 29  0
4043            30  0  0  | 31 32 33  |  0 34
4044 .ve
4045 
4046    This can be represented as a collection of submatrices as:
4047 
4048 .vb
4049       A B C
4050       D E F
4051       G H I
4052 .ve
4053 
4054    Where the submatrices A,B,C are owned by proc0, D,E,F are
4055    owned by proc1, G,H,I are owned by proc2.
4056 
4057    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4058    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4059    The 'M','N' parameters are 8,8, and have the same values on all procs.
4060 
4061    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4062    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4063    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4064    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4065    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4066    matrix, ans [DF] as another SeqAIJ matrix.
4067 
4068    When d_nz, o_nz parameters are specified, d_nz storage elements are
4069    allocated for every row of the local diagonal submatrix, and o_nz
4070    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4071    One way to choose d_nz and o_nz is to use the max nonzerors per local
4072    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4073    In this case, the values of d_nz,o_nz are:
4074 .vb
4075      proc0 : dnz = 2, o_nz = 2
4076      proc1 : dnz = 3, o_nz = 2
4077      proc2 : dnz = 1, o_nz = 4
4078 .ve
4079    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4080    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4081    for proc3. i.e we are using 12+15+10=37 storage locations to store
4082    34 values.
4083 
4084    When d_nnz, o_nnz parameters are specified, the storage is specified
4085    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4086    In the above case the values for d_nnz,o_nnz are:
4087 .vb
4088      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4089      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4090      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4091 .ve
4092    Here the space allocated is sum of all the above values i.e 34, and
4093    hence pre-allocation is perfect.
4094 
4095    Level: intermediate
4096 
4097 .keywords: matrix, aij, compressed row, sparse, parallel
4098 
4099 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4100           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4101 @*/
4102 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4103 {
4104   PetscErrorCode ierr;
4105 
4106   PetscFunctionBegin;
4107   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4108   PetscValidType(B,1);
4109   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4110   PetscFunctionReturn(0);
4111 }
4112 
4113 #undef __FUNCT__
4114 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4115 /*@
4116      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4117          CSR format the local rows.
4118 
4119    Collective on MPI_Comm
4120 
4121    Input Parameters:
4122 +  comm - MPI communicator
4123 .  m - number of local rows (Cannot be PETSC_DECIDE)
4124 .  n - This value should be the same as the local size used in creating the
4125        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4126        calculated if N is given) For square matrices n is almost always m.
4127 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4128 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4129 .   i - row indices
4130 .   j - column indices
4131 -   a - matrix values
4132 
4133    Output Parameter:
4134 .   mat - the matrix
4135 
4136    Level: intermediate
4137 
4138    Notes:
4139        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4140      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4141      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4142 
4143        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4144 
4145        The format which is used for the sparse matrix input, is equivalent to a
4146     row-major ordering.. i.e for the following matrix, the input data expected is
4147     as shown:
4148 
4149         1 0 0
4150         2 0 3     P0
4151        -------
4152         4 5 6     P1
4153 
4154      Process0 [P0]: rows_owned=[0,1]
4155         i =  {0,1,3}  [size = nrow+1  = 2+1]
4156         j =  {0,0,2}  [size = nz = 6]
4157         v =  {1,2,3}  [size = nz = 6]
4158 
4159      Process1 [P1]: rows_owned=[2]
4160         i =  {0,3}    [size = nrow+1  = 1+1]
4161         j =  {0,1,2}  [size = nz = 6]
4162         v =  {4,5,6}  [size = nz = 6]
4163 
4164 .keywords: matrix, aij, compressed row, sparse, parallel
4165 
4166 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4167           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4168 @*/
4169 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4170 {
4171   PetscErrorCode ierr;
4172 
4173   PetscFunctionBegin;
4174   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4175   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4176   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4177   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4178   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4179   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4180   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4181   PetscFunctionReturn(0);
4182 }
4183 
4184 #undef __FUNCT__
4185 #define __FUNCT__ "MatCreateAIJ"
4186 /*@C
4187    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4188    (the default parallel PETSc format).  For good matrix assembly performance
4189    the user should preallocate the matrix storage by setting the parameters
4190    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4191    performance can be increased by more than a factor of 50.
4192 
4193    Collective on MPI_Comm
4194 
4195    Input Parameters:
4196 +  comm - MPI communicator
4197 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4198            This value should be the same as the local size used in creating the
4199            y vector for the matrix-vector product y = Ax.
4200 .  n - This value should be the same as the local size used in creating the
4201        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4202        calculated if N is given) For square matrices n is almost always m.
4203 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4204 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4205 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4206            (same value is used for all local rows)
4207 .  d_nnz - array containing the number of nonzeros in the various rows of the
4208            DIAGONAL portion of the local submatrix (possibly different for each row)
4209            or NULL, if d_nz is used to specify the nonzero structure.
4210            The size of this array is equal to the number of local rows, i.e 'm'.
4211 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4212            submatrix (same value is used for all local rows).
4213 -  o_nnz - array containing the number of nonzeros in the various rows of the
4214            OFF-DIAGONAL portion of the local submatrix (possibly different for
4215            each row) or NULL, if o_nz is used to specify the nonzero
4216            structure. The size of this array is equal to the number
4217            of local rows, i.e 'm'.
4218 
4219    Output Parameter:
4220 .  A - the matrix
4221 
4222    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4223    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4224    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4225 
4226    Notes:
4227    If the *_nnz parameter is given then the *_nz parameter is ignored
4228 
4229    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4230    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4231    storage requirements for this matrix.
4232 
4233    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4234    processor than it must be used on all processors that share the object for
4235    that argument.
4236 
4237    The user MUST specify either the local or global matrix dimensions
4238    (possibly both).
4239 
4240    The parallel matrix is partitioned across processors such that the
4241    first m0 rows belong to process 0, the next m1 rows belong to
4242    process 1, the next m2 rows belong to process 2 etc.. where
4243    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4244    values corresponding to [m x N] submatrix.
4245 
4246    The columns are logically partitioned with the n0 columns belonging
4247    to 0th partition, the next n1 columns belonging to the next
4248    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4249 
4250    The DIAGONAL portion of the local submatrix on any given processor
4251    is the submatrix corresponding to the rows and columns m,n
4252    corresponding to the given processor. i.e diagonal matrix on
4253    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4254    etc. The remaining portion of the local submatrix [m x (N-n)]
4255    constitute the OFF-DIAGONAL portion. The example below better
4256    illustrates this concept.
4257 
4258    For a square global matrix we define each processor's diagonal portion
4259    to be its local rows and the corresponding columns (a square submatrix);
4260    each processor's off-diagonal portion encompasses the remainder of the
4261    local matrix (a rectangular submatrix).
4262 
4263    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4264 
4265    When calling this routine with a single process communicator, a matrix of
4266    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4267    type of communicator, use the construction mechanism:
4268      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4269 
4270    By default, this format uses inodes (identical nodes) when possible.
4271    We search for consecutive rows with the same nonzero structure, thereby
4272    reusing matrix information to achieve increased efficiency.
4273 
4274    Options Database Keys:
4275 +  -mat_no_inode  - Do not use inodes
4276 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4277 -  -mat_aij_oneindex - Internally use indexing starting at 1
4278         rather than 0.  Note that when calling MatSetValues(),
4279         the user still MUST index entries starting at 0!
4280 
4281 
4282    Example usage:
4283 
4284    Consider the following 8x8 matrix with 34 non-zero values, that is
4285    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4286    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4287    as follows:
4288 
4289 .vb
4290             1  2  0  |  0  3  0  |  0  4
4291     Proc0   0  5  6  |  7  0  0  |  8  0
4292             9  0 10  | 11  0  0  | 12  0
4293     -------------------------------------
4294            13  0 14  | 15 16 17  |  0  0
4295     Proc1   0 18  0  | 19 20 21  |  0  0
4296             0  0  0  | 22 23  0  | 24  0
4297     -------------------------------------
4298     Proc2  25 26 27  |  0  0 28  | 29  0
4299            30  0  0  | 31 32 33  |  0 34
4300 .ve
4301 
4302    This can be represented as a collection of submatrices as:
4303 
4304 .vb
4305       A B C
4306       D E F
4307       G H I
4308 .ve
4309 
4310    Where the submatrices A,B,C are owned by proc0, D,E,F are
4311    owned by proc1, G,H,I are owned by proc2.
4312 
4313    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4314    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4315    The 'M','N' parameters are 8,8, and have the same values on all procs.
4316 
4317    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4318    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4319    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4320    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4321    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4322    matrix, ans [DF] as another SeqAIJ matrix.
4323 
4324    When d_nz, o_nz parameters are specified, d_nz storage elements are
4325    allocated for every row of the local diagonal submatrix, and o_nz
4326    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4327    One way to choose d_nz and o_nz is to use the max nonzerors per local
4328    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4329    In this case, the values of d_nz,o_nz are:
4330 .vb
4331      proc0 : dnz = 2, o_nz = 2
4332      proc1 : dnz = 3, o_nz = 2
4333      proc2 : dnz = 1, o_nz = 4
4334 .ve
4335    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4336    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4337    for proc3. i.e we are using 12+15+10=37 storage locations to store
4338    34 values.
4339 
4340    When d_nnz, o_nnz parameters are specified, the storage is specified
4341    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4342    In the above case the values for d_nnz,o_nnz are:
4343 .vb
4344      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4345      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4346      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4347 .ve
4348    Here the space allocated is sum of all the above values i.e 34, and
4349    hence pre-allocation is perfect.
4350 
4351    Level: intermediate
4352 
4353 .keywords: matrix, aij, compressed row, sparse, parallel
4354 
4355 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4356           MPIAIJ, MatCreateMPIAIJWithArrays()
4357 @*/
4358 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4359 {
4360   PetscErrorCode ierr;
4361   PetscMPIInt    size;
4362 
4363   PetscFunctionBegin;
4364   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4365   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4366   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4367   if (size > 1) {
4368     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4369     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4370   } else {
4371     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4372     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4373   }
4374   PetscFunctionReturn(0);
4375 }
4376 
4377 #undef __FUNCT__
4378 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4379 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4380 {
4381   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4382 
4383   PetscFunctionBegin;
4384   *Ad     = a->A;
4385   *Ao     = a->B;
4386   *colmap = a->garray;
4387   PetscFunctionReturn(0);
4388 }
4389 
4390 #undef __FUNCT__
4391 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4392 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4393 {
4394   PetscErrorCode ierr;
4395   PetscInt       i;
4396   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4397 
4398   PetscFunctionBegin;
4399   if (coloring->ctype == IS_COLORING_GLOBAL) {
4400     ISColoringValue *allcolors,*colors;
4401     ISColoring      ocoloring;
4402 
4403     /* set coloring for diagonal portion */
4404     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4405 
4406     /* set coloring for off-diagonal portion */
4407     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4408     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4409     for (i=0; i<a->B->cmap->n; i++) {
4410       colors[i] = allcolors[a->garray[i]];
4411     }
4412     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4413     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4414     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4415     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4416   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4417     ISColoringValue *colors;
4418     PetscInt        *larray;
4419     ISColoring      ocoloring;
4420 
4421     /* set coloring for diagonal portion */
4422     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4423     for (i=0; i<a->A->cmap->n; i++) {
4424       larray[i] = i + A->cmap->rstart;
4425     }
4426     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4427     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4428     for (i=0; i<a->A->cmap->n; i++) {
4429       colors[i] = coloring->colors[larray[i]];
4430     }
4431     ierr = PetscFree(larray);CHKERRQ(ierr);
4432     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4433     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4434     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4435 
4436     /* set coloring for off-diagonal portion */
4437     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4438     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4439     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4440     for (i=0; i<a->B->cmap->n; i++) {
4441       colors[i] = coloring->colors[larray[i]];
4442     }
4443     ierr = PetscFree(larray);CHKERRQ(ierr);
4444     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4445     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4446     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4447   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4448   PetscFunctionReturn(0);
4449 }
4450 
4451 #undef __FUNCT__
4452 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4453 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4454 {
4455   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4456   PetscErrorCode ierr;
4457 
4458   PetscFunctionBegin;
4459   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4460   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4461   PetscFunctionReturn(0);
4462 }
4463 
4464 #undef __FUNCT__
4465 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4466 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4467 {
4468   PetscErrorCode ierr;
4469   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4470   PetscInt       *indx;
4471 
4472   PetscFunctionBegin;
4473   /* This routine will ONLY return MPIAIJ type matrix */
4474   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4475   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4476   if (n == PETSC_DECIDE) {
4477     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4478   }
4479   /* Check sum(n) = N */
4480   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4481   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4482 
4483   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4484   rstart -= m;
4485 
4486   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4487   for (i=0; i<m; i++) {
4488     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4489     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4490     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4491   }
4492 
4493   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4494   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4495   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4496   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4497   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4498   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4499   PetscFunctionReturn(0);
4500 }
4501 
4502 #undef __FUNCT__
4503 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4504 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4505 {
4506   PetscErrorCode ierr;
4507   PetscInt       m,N,i,rstart,nnz,Ii;
4508   PetscInt       *indx;
4509   PetscScalar    *values;
4510 
4511   PetscFunctionBegin;
4512   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4513   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4514   for (i=0; i<m; i++) {
4515     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4516     Ii   = i + rstart;
4517     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4518     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4519   }
4520   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4521   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4522   PetscFunctionReturn(0);
4523 }
4524 
4525 #undef __FUNCT__
4526 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4527 /*@
4528       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4529                  matrices from each processor
4530 
4531     Collective on MPI_Comm
4532 
4533    Input Parameters:
4534 +    comm - the communicators the parallel matrix will live on
4535 .    inmat - the input sequential matrices
4536 .    n - number of local columns (or PETSC_DECIDE)
4537 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4538 
4539    Output Parameter:
4540 .    outmat - the parallel matrix generated
4541 
4542     Level: advanced
4543 
4544    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4545 
4546 @*/
4547 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4548 {
4549   PetscErrorCode ierr;
4550 
4551   PetscFunctionBegin;
4552   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4553   if (scall == MAT_INITIAL_MATRIX) {
4554     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4555   }
4556   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4557   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4558   PetscFunctionReturn(0);
4559 }
4560 
4561 #undef __FUNCT__
4562 #define __FUNCT__ "MatFileSplit"
4563 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4564 {
4565   PetscErrorCode    ierr;
4566   PetscMPIInt       rank;
4567   PetscInt          m,N,i,rstart,nnz;
4568   size_t            len;
4569   const PetscInt    *indx;
4570   PetscViewer       out;
4571   char              *name;
4572   Mat               B;
4573   const PetscScalar *values;
4574 
4575   PetscFunctionBegin;
4576   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4577   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4578   /* Should this be the type of the diagonal block of A? */
4579   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4580   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4581   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4582   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4583   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4584   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4585   for (i=0; i<m; i++) {
4586     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4587     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4588     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4589   }
4590   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4591   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4592 
4593   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4594   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4595   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4596   sprintf(name,"%s.%d",outfile,rank);
4597   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4598   ierr = PetscFree(name);CHKERRQ(ierr);
4599   ierr = MatView(B,out);CHKERRQ(ierr);
4600   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4601   ierr = MatDestroy(&B);CHKERRQ(ierr);
4602   PetscFunctionReturn(0);
4603 }
4604 
4605 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4606 #undef __FUNCT__
4607 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4608 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4609 {
4610   PetscErrorCode      ierr;
4611   Mat_Merge_SeqsToMPI *merge;
4612   PetscContainer      container;
4613 
4614   PetscFunctionBegin;
4615   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4616   if (container) {
4617     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4618     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4619     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4620     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4621     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4622     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4623     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4624     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4625     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4626     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4627     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4628     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4629     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4630     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4631     ierr = PetscFree(merge);CHKERRQ(ierr);
4632     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4633   }
4634   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4635   PetscFunctionReturn(0);
4636 }
4637 
4638 #include <../src/mat/utils/freespace.h>
4639 #include <petscbt.h>
4640 
4641 #undef __FUNCT__
4642 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4643 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4644 {
4645   PetscErrorCode      ierr;
4646   MPI_Comm            comm;
4647   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4648   PetscMPIInt         size,rank,taga,*len_s;
4649   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4650   PetscInt            proc,m;
4651   PetscInt            **buf_ri,**buf_rj;
4652   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4653   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4654   MPI_Request         *s_waits,*r_waits;
4655   MPI_Status          *status;
4656   MatScalar           *aa=a->a;
4657   MatScalar           **abuf_r,*ba_i;
4658   Mat_Merge_SeqsToMPI *merge;
4659   PetscContainer      container;
4660 
4661   PetscFunctionBegin;
4662   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4663   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4664 
4665   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4666   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4667 
4668   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4669   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4670 
4671   bi     = merge->bi;
4672   bj     = merge->bj;
4673   buf_ri = merge->buf_ri;
4674   buf_rj = merge->buf_rj;
4675 
4676   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4677   owners = merge->rowmap->range;
4678   len_s  = merge->len_s;
4679 
4680   /* send and recv matrix values */
4681   /*-----------------------------*/
4682   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4683   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4684 
4685   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4686   for (proc=0,k=0; proc<size; proc++) {
4687     if (!len_s[proc]) continue;
4688     i    = owners[proc];
4689     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4690     k++;
4691   }
4692 
4693   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4694   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4695   ierr = PetscFree(status);CHKERRQ(ierr);
4696 
4697   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4698   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4699 
4700   /* insert mat values of mpimat */
4701   /*----------------------------*/
4702   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4703   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4704 
4705   for (k=0; k<merge->nrecv; k++) {
4706     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4707     nrows       = *(buf_ri_k[k]);
4708     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4709     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4710   }
4711 
4712   /* set values of ba */
4713   m = merge->rowmap->n;
4714   for (i=0; i<m; i++) {
4715     arow = owners[rank] + i;
4716     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4717     bnzi = bi[i+1] - bi[i];
4718     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4719 
4720     /* add local non-zero vals of this proc's seqmat into ba */
4721     anzi   = ai[arow+1] - ai[arow];
4722     aj     = a->j + ai[arow];
4723     aa     = a->a + ai[arow];
4724     nextaj = 0;
4725     for (j=0; nextaj<anzi; j++) {
4726       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4727         ba_i[j] += aa[nextaj++];
4728       }
4729     }
4730 
4731     /* add received vals into ba */
4732     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4733       /* i-th row */
4734       if (i == *nextrow[k]) {
4735         anzi   = *(nextai[k]+1) - *nextai[k];
4736         aj     = buf_rj[k] + *(nextai[k]);
4737         aa     = abuf_r[k] + *(nextai[k]);
4738         nextaj = 0;
4739         for (j=0; nextaj<anzi; j++) {
4740           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4741             ba_i[j] += aa[nextaj++];
4742           }
4743         }
4744         nextrow[k]++; nextai[k]++;
4745       }
4746     }
4747     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4748   }
4749   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4750   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4751 
4752   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4753   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4754   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4755   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4756   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4757   PetscFunctionReturn(0);
4758 }
4759 
4760 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4761 
4762 #undef __FUNCT__
4763 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4764 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4765 {
4766   PetscErrorCode      ierr;
4767   Mat                 B_mpi;
4768   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4769   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4770   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4771   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4772   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4773   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4774   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4775   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4776   MPI_Status          *status;
4777   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4778   PetscBT             lnkbt;
4779   Mat_Merge_SeqsToMPI *merge;
4780   PetscContainer      container;
4781 
4782   PetscFunctionBegin;
4783   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4784 
4785   /* make sure it is a PETSc comm */
4786   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4787   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4788   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4789 
4790   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4791   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4792 
4793   /* determine row ownership */
4794   /*---------------------------------------------------------*/
4795   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4796   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4797   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4798   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4799   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4800   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4801   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4802 
4803   m      = merge->rowmap->n;
4804   owners = merge->rowmap->range;
4805 
4806   /* determine the number of messages to send, their lengths */
4807   /*---------------------------------------------------------*/
4808   len_s = merge->len_s;
4809 
4810   len          = 0; /* length of buf_si[] */
4811   merge->nsend = 0;
4812   for (proc=0; proc<size; proc++) {
4813     len_si[proc] = 0;
4814     if (proc == rank) {
4815       len_s[proc] = 0;
4816     } else {
4817       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4818       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4819     }
4820     if (len_s[proc]) {
4821       merge->nsend++;
4822       nrows = 0;
4823       for (i=owners[proc]; i<owners[proc+1]; i++) {
4824         if (ai[i+1] > ai[i]) nrows++;
4825       }
4826       len_si[proc] = 2*(nrows+1);
4827       len         += len_si[proc];
4828     }
4829   }
4830 
4831   /* determine the number and length of messages to receive for ij-structure */
4832   /*-------------------------------------------------------------------------*/
4833   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4834   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4835 
4836   /* post the Irecv of j-structure */
4837   /*-------------------------------*/
4838   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4839   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4840 
4841   /* post the Isend of j-structure */
4842   /*--------------------------------*/
4843   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4844 
4845   for (proc=0, k=0; proc<size; proc++) {
4846     if (!len_s[proc]) continue;
4847     i    = owners[proc];
4848     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4849     k++;
4850   }
4851 
4852   /* receives and sends of j-structure are complete */
4853   /*------------------------------------------------*/
4854   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4855   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4856 
4857   /* send and recv i-structure */
4858   /*---------------------------*/
4859   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4860   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4861 
4862   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4863   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4864   for (proc=0,k=0; proc<size; proc++) {
4865     if (!len_s[proc]) continue;
4866     /* form outgoing message for i-structure:
4867          buf_si[0]:                 nrows to be sent
4868                [1:nrows]:           row index (global)
4869                [nrows+1:2*nrows+1]: i-structure index
4870     */
4871     /*-------------------------------------------*/
4872     nrows       = len_si[proc]/2 - 1;
4873     buf_si_i    = buf_si + nrows+1;
4874     buf_si[0]   = nrows;
4875     buf_si_i[0] = 0;
4876     nrows       = 0;
4877     for (i=owners[proc]; i<owners[proc+1]; i++) {
4878       anzi = ai[i+1] - ai[i];
4879       if (anzi) {
4880         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4881         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4882         nrows++;
4883       }
4884     }
4885     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4886     k++;
4887     buf_si += len_si[proc];
4888   }
4889 
4890   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4891   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4892 
4893   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4894   for (i=0; i<merge->nrecv; i++) {
4895     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4896   }
4897 
4898   ierr = PetscFree(len_si);CHKERRQ(ierr);
4899   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4900   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4901   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4902   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4903   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4904   ierr = PetscFree(status);CHKERRQ(ierr);
4905 
4906   /* compute a local seq matrix in each processor */
4907   /*----------------------------------------------*/
4908   /* allocate bi array and free space for accumulating nonzero column info */
4909   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4910   bi[0] = 0;
4911 
4912   /* create and initialize a linked list */
4913   nlnk = N+1;
4914   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4915 
4916   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4917   len  = ai[owners[rank+1]] - ai[owners[rank]];
4918   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4919 
4920   current_space = free_space;
4921 
4922   /* determine symbolic info for each local row */
4923   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4924 
4925   for (k=0; k<merge->nrecv; k++) {
4926     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4927     nrows       = *buf_ri_k[k];
4928     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4929     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4930   }
4931 
4932   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4933   len  = 0;
4934   for (i=0; i<m; i++) {
4935     bnzi = 0;
4936     /* add local non-zero cols of this proc's seqmat into lnk */
4937     arow  = owners[rank] + i;
4938     anzi  = ai[arow+1] - ai[arow];
4939     aj    = a->j + ai[arow];
4940     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4941     bnzi += nlnk;
4942     /* add received col data into lnk */
4943     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4944       if (i == *nextrow[k]) { /* i-th row */
4945         anzi  = *(nextai[k]+1) - *nextai[k];
4946         aj    = buf_rj[k] + *nextai[k];
4947         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4948         bnzi += nlnk;
4949         nextrow[k]++; nextai[k]++;
4950       }
4951     }
4952     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4953 
4954     /* if free space is not available, make more free space */
4955     if (current_space->local_remaining<bnzi) {
4956       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4957       nspacedouble++;
4958     }
4959     /* copy data into free space, then initialize lnk */
4960     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4961     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4962 
4963     current_space->array           += bnzi;
4964     current_space->local_used      += bnzi;
4965     current_space->local_remaining -= bnzi;
4966 
4967     bi[i+1] = bi[i] + bnzi;
4968   }
4969 
4970   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4971 
4972   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4973   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4974   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4975 
4976   /* create symbolic parallel matrix B_mpi */
4977   /*---------------------------------------*/
4978   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4979   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4980   if (n==PETSC_DECIDE) {
4981     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4982   } else {
4983     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4984   }
4985   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4986   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4987   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4988   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4989   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4990 
4991   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4992   B_mpi->assembled    = PETSC_FALSE;
4993   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4994   merge->bi           = bi;
4995   merge->bj           = bj;
4996   merge->buf_ri       = buf_ri;
4997   merge->buf_rj       = buf_rj;
4998   merge->coi          = NULL;
4999   merge->coj          = NULL;
5000   merge->owners_co    = NULL;
5001 
5002   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5003 
5004   /* attach the supporting struct to B_mpi for reuse */
5005   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5006   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5007   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5008   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5009   *mpimat = B_mpi;
5010 
5011   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5012   PetscFunctionReturn(0);
5013 }
5014 
5015 #undef __FUNCT__
5016 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
5017 /*@C
5018       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
5019                  matrices from each processor
5020 
5021     Collective on MPI_Comm
5022 
5023    Input Parameters:
5024 +    comm - the communicators the parallel matrix will live on
5025 .    seqmat - the input sequential matrices
5026 .    m - number of local rows (or PETSC_DECIDE)
5027 .    n - number of local columns (or PETSC_DECIDE)
5028 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5029 
5030    Output Parameter:
5031 .    mpimat - the parallel matrix generated
5032 
5033     Level: advanced
5034 
5035    Notes:
5036      The dimensions of the sequential matrix in each processor MUST be the same.
5037      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5038      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5039 @*/
5040 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5041 {
5042   PetscErrorCode ierr;
5043   PetscMPIInt    size;
5044 
5045   PetscFunctionBegin;
5046   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5047   if (size == 1) {
5048     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5049     if (scall == MAT_INITIAL_MATRIX) {
5050       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5051     } else {
5052       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5053     }
5054     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5055     PetscFunctionReturn(0);
5056   }
5057   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5058   if (scall == MAT_INITIAL_MATRIX) {
5059     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5060   }
5061   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5062   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5063   PetscFunctionReturn(0);
5064 }
5065 
5066 #undef __FUNCT__
5067 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5068 /*@
5069      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5070           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5071           with MatGetSize()
5072 
5073     Not Collective
5074 
5075    Input Parameters:
5076 +    A - the matrix
5077 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5078 
5079    Output Parameter:
5080 .    A_loc - the local sequential matrix generated
5081 
5082     Level: developer
5083 
5084 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5085 
5086 @*/
5087 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5088 {
5089   PetscErrorCode ierr;
5090   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5091   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5092   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5093   MatScalar      *aa=a->a,*ba=b->a,*cam;
5094   PetscScalar    *ca;
5095   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5096   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5097   PetscBool      match;
5098 
5099   PetscFunctionBegin;
5100   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5101   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5102   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5103   if (scall == MAT_INITIAL_MATRIX) {
5104     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5105     ci[0] = 0;
5106     for (i=0; i<am; i++) {
5107       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5108     }
5109     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5110     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5111     k    = 0;
5112     for (i=0; i<am; i++) {
5113       ncols_o = bi[i+1] - bi[i];
5114       ncols_d = ai[i+1] - ai[i];
5115       /* off-diagonal portion of A */
5116       for (jo=0; jo<ncols_o; jo++) {
5117         col = cmap[*bj];
5118         if (col >= cstart) break;
5119         cj[k]   = col; bj++;
5120         ca[k++] = *ba++;
5121       }
5122       /* diagonal portion of A */
5123       for (j=0; j<ncols_d; j++) {
5124         cj[k]   = cstart + *aj++;
5125         ca[k++] = *aa++;
5126       }
5127       /* off-diagonal portion of A */
5128       for (j=jo; j<ncols_o; j++) {
5129         cj[k]   = cmap[*bj++];
5130         ca[k++] = *ba++;
5131       }
5132     }
5133     /* put together the new matrix */
5134     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5135     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5136     /* Since these are PETSc arrays, change flags to free them as necessary. */
5137     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5138     mat->free_a  = PETSC_TRUE;
5139     mat->free_ij = PETSC_TRUE;
5140     mat->nonew   = 0;
5141   } else if (scall == MAT_REUSE_MATRIX) {
5142     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5143     ci = mat->i; cj = mat->j; cam = mat->a;
5144     for (i=0; i<am; i++) {
5145       /* off-diagonal portion of A */
5146       ncols_o = bi[i+1] - bi[i];
5147       for (jo=0; jo<ncols_o; jo++) {
5148         col = cmap[*bj];
5149         if (col >= cstart) break;
5150         *cam++ = *ba++; bj++;
5151       }
5152       /* diagonal portion of A */
5153       ncols_d = ai[i+1] - ai[i];
5154       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5155       /* off-diagonal portion of A */
5156       for (j=jo; j<ncols_o; j++) {
5157         *cam++ = *ba++; bj++;
5158       }
5159     }
5160   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5161   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5162   PetscFunctionReturn(0);
5163 }
5164 
5165 #undef __FUNCT__
5166 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5167 /*@C
5168      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5169 
5170     Not Collective
5171 
5172    Input Parameters:
5173 +    A - the matrix
5174 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5175 -    row, col - index sets of rows and columns to extract (or NULL)
5176 
5177    Output Parameter:
5178 .    A_loc - the local sequential matrix generated
5179 
5180     Level: developer
5181 
5182 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5183 
5184 @*/
5185 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5186 {
5187   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5188   PetscErrorCode ierr;
5189   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5190   IS             isrowa,iscola;
5191   Mat            *aloc;
5192   PetscBool      match;
5193 
5194   PetscFunctionBegin;
5195   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5196   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5197   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5198   if (!row) {
5199     start = A->rmap->rstart; end = A->rmap->rend;
5200     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5201   } else {
5202     isrowa = *row;
5203   }
5204   if (!col) {
5205     start = A->cmap->rstart;
5206     cmap  = a->garray;
5207     nzA   = a->A->cmap->n;
5208     nzB   = a->B->cmap->n;
5209     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5210     ncols = 0;
5211     for (i=0; i<nzB; i++) {
5212       if (cmap[i] < start) idx[ncols++] = cmap[i];
5213       else break;
5214     }
5215     imark = i;
5216     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5217     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5218     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5219   } else {
5220     iscola = *col;
5221   }
5222   if (scall != MAT_INITIAL_MATRIX) {
5223     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5224     aloc[0] = *A_loc;
5225   }
5226   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5227   *A_loc = aloc[0];
5228   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5229   if (!row) {
5230     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5231   }
5232   if (!col) {
5233     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5234   }
5235   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5236   PetscFunctionReturn(0);
5237 }
5238 
5239 #undef __FUNCT__
5240 #define __FUNCT__ "MatGetBrowsOfAcols"
5241 /*@C
5242     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5243 
5244     Collective on Mat
5245 
5246    Input Parameters:
5247 +    A,B - the matrices in mpiaij format
5248 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5249 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5250 
5251    Output Parameter:
5252 +    rowb, colb - index sets of rows and columns of B to extract
5253 -    B_seq - the sequential matrix generated
5254 
5255     Level: developer
5256 
5257 @*/
5258 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5259 {
5260   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5261   PetscErrorCode ierr;
5262   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5263   IS             isrowb,iscolb;
5264   Mat            *bseq=NULL;
5265 
5266   PetscFunctionBegin;
5267   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5268     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5269   }
5270   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5271 
5272   if (scall == MAT_INITIAL_MATRIX) {
5273     start = A->cmap->rstart;
5274     cmap  = a->garray;
5275     nzA   = a->A->cmap->n;
5276     nzB   = a->B->cmap->n;
5277     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5278     ncols = 0;
5279     for (i=0; i<nzB; i++) {  /* row < local row index */
5280       if (cmap[i] < start) idx[ncols++] = cmap[i];
5281       else break;
5282     }
5283     imark = i;
5284     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5285     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5286     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5287     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5288   } else {
5289     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5290     isrowb  = *rowb; iscolb = *colb;
5291     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5292     bseq[0] = *B_seq;
5293   }
5294   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5295   *B_seq = bseq[0];
5296   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5297   if (!rowb) {
5298     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5299   } else {
5300     *rowb = isrowb;
5301   }
5302   if (!colb) {
5303     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5304   } else {
5305     *colb = iscolb;
5306   }
5307   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5308   PetscFunctionReturn(0);
5309 }
5310 
5311 #undef __FUNCT__
5312 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5313 /*
5314     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5315     of the OFF-DIAGONAL portion of local A
5316 
5317     Collective on Mat
5318 
5319    Input Parameters:
5320 +    A,B - the matrices in mpiaij format
5321 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5322 
5323    Output Parameter:
5324 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5325 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5326 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5327 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5328 
5329     Level: developer
5330 
5331 */
5332 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5333 {
5334   VecScatter_MPI_General *gen_to,*gen_from;
5335   PetscErrorCode         ierr;
5336   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5337   Mat_SeqAIJ             *b_oth;
5338   VecScatter             ctx =a->Mvctx;
5339   MPI_Comm               comm;
5340   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5341   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5342   PetscScalar            *rvalues,*svalues;
5343   MatScalar              *b_otha,*bufa,*bufA;
5344   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5345   MPI_Request            *rwaits = NULL,*swaits = NULL;
5346   MPI_Status             *sstatus,rstatus;
5347   PetscMPIInt            jj;
5348   PetscInt               *cols,sbs,rbs;
5349   PetscScalar            *vals;
5350 
5351   PetscFunctionBegin;
5352   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5353   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5354     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5355   }
5356   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5357   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5358 
5359   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5360   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5361   rvalues  = gen_from->values; /* holds the length of receiving row */
5362   svalues  = gen_to->values;   /* holds the length of sending row */
5363   nrecvs   = gen_from->n;
5364   nsends   = gen_to->n;
5365 
5366   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5367   srow    = gen_to->indices;    /* local row index to be sent */
5368   sstarts = gen_to->starts;
5369   sprocs  = gen_to->procs;
5370   sstatus = gen_to->sstatus;
5371   sbs     = gen_to->bs;
5372   rstarts = gen_from->starts;
5373   rprocs  = gen_from->procs;
5374   rbs     = gen_from->bs;
5375 
5376   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5377   if (scall == MAT_INITIAL_MATRIX) {
5378     /* i-array */
5379     /*---------*/
5380     /*  post receives */
5381     for (i=0; i<nrecvs; i++) {
5382       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5383       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5384       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5385     }
5386 
5387     /* pack the outgoing message */
5388     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5389 
5390     sstartsj[0] = 0;
5391     rstartsj[0] = 0;
5392     len         = 0; /* total length of j or a array to be sent */
5393     k           = 0;
5394     for (i=0; i<nsends; i++) {
5395       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5396       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5397       for (j=0; j<nrows; j++) {
5398         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5399         for (l=0; l<sbs; l++) {
5400           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5401 
5402           rowlen[j*sbs+l] = ncols;
5403 
5404           len += ncols;
5405           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5406         }
5407         k++;
5408       }
5409       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5410 
5411       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5412     }
5413     /* recvs and sends of i-array are completed */
5414     i = nrecvs;
5415     while (i--) {
5416       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5417     }
5418     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5419 
5420     /* allocate buffers for sending j and a arrays */
5421     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5422     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5423 
5424     /* create i-array of B_oth */
5425     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5426 
5427     b_othi[0] = 0;
5428     len       = 0; /* total length of j or a array to be received */
5429     k         = 0;
5430     for (i=0; i<nrecvs; i++) {
5431       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5432       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5433       for (j=0; j<nrows; j++) {
5434         b_othi[k+1] = b_othi[k] + rowlen[j];
5435         len        += rowlen[j]; k++;
5436       }
5437       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5438     }
5439 
5440     /* allocate space for j and a arrrays of B_oth */
5441     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5442     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5443 
5444     /* j-array */
5445     /*---------*/
5446     /*  post receives of j-array */
5447     for (i=0; i<nrecvs; i++) {
5448       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5449       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5450     }
5451 
5452     /* pack the outgoing message j-array */
5453     k = 0;
5454     for (i=0; i<nsends; i++) {
5455       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5456       bufJ  = bufj+sstartsj[i];
5457       for (j=0; j<nrows; j++) {
5458         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5459         for (ll=0; ll<sbs; ll++) {
5460           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5461           for (l=0; l<ncols; l++) {
5462             *bufJ++ = cols[l];
5463           }
5464           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5465         }
5466       }
5467       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5468     }
5469 
5470     /* recvs and sends of j-array are completed */
5471     i = nrecvs;
5472     while (i--) {
5473       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5474     }
5475     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5476   } else if (scall == MAT_REUSE_MATRIX) {
5477     sstartsj = *startsj_s;
5478     rstartsj = *startsj_r;
5479     bufa     = *bufa_ptr;
5480     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5481     b_otha   = b_oth->a;
5482   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5483 
5484   /* a-array */
5485   /*---------*/
5486   /*  post receives of a-array */
5487   for (i=0; i<nrecvs; i++) {
5488     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5489     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5490   }
5491 
5492   /* pack the outgoing message a-array */
5493   k = 0;
5494   for (i=0; i<nsends; i++) {
5495     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5496     bufA  = bufa+sstartsj[i];
5497     for (j=0; j<nrows; j++) {
5498       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5499       for (ll=0; ll<sbs; ll++) {
5500         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5501         for (l=0; l<ncols; l++) {
5502           *bufA++ = vals[l];
5503         }
5504         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5505       }
5506     }
5507     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5508   }
5509   /* recvs and sends of a-array are completed */
5510   i = nrecvs;
5511   while (i--) {
5512     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5513   }
5514   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5515   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5516 
5517   if (scall == MAT_INITIAL_MATRIX) {
5518     /* put together the new matrix */
5519     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5520 
5521     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5522     /* Since these are PETSc arrays, change flags to free them as necessary. */
5523     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5524     b_oth->free_a  = PETSC_TRUE;
5525     b_oth->free_ij = PETSC_TRUE;
5526     b_oth->nonew   = 0;
5527 
5528     ierr = PetscFree(bufj);CHKERRQ(ierr);
5529     if (!startsj_s || !bufa_ptr) {
5530       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5531       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5532     } else {
5533       *startsj_s = sstartsj;
5534       *startsj_r = rstartsj;
5535       *bufa_ptr  = bufa;
5536     }
5537   }
5538   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5539   PetscFunctionReturn(0);
5540 }
5541 
5542 #undef __FUNCT__
5543 #define __FUNCT__ "MatGetCommunicationStructs"
5544 /*@C
5545   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5546 
5547   Not Collective
5548 
5549   Input Parameters:
5550 . A - The matrix in mpiaij format
5551 
5552   Output Parameter:
5553 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5554 . colmap - A map from global column index to local index into lvec
5555 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5556 
5557   Level: developer
5558 
5559 @*/
5560 #if defined(PETSC_USE_CTABLE)
5561 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5562 #else
5563 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5564 #endif
5565 {
5566   Mat_MPIAIJ *a;
5567 
5568   PetscFunctionBegin;
5569   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5570   PetscValidPointer(lvec, 2);
5571   PetscValidPointer(colmap, 3);
5572   PetscValidPointer(multScatter, 4);
5573   a = (Mat_MPIAIJ*) A->data;
5574   if (lvec) *lvec = a->lvec;
5575   if (colmap) *colmap = a->colmap;
5576   if (multScatter) *multScatter = a->Mvctx;
5577   PetscFunctionReturn(0);
5578 }
5579 
5580 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5581 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5582 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5583 
5584 #undef __FUNCT__
5585 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5586 /*
5587     Computes (B'*A')' since computing B*A directly is untenable
5588 
5589                n                       p                          p
5590         (              )       (              )         (                  )
5591       m (      A       )  *  n (       B      )   =   m (         C        )
5592         (              )       (              )         (                  )
5593 
5594 */
5595 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5596 {
5597   PetscErrorCode ierr;
5598   Mat            At,Bt,Ct;
5599 
5600   PetscFunctionBegin;
5601   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5602   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5603   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5604   ierr = MatDestroy(&At);CHKERRQ(ierr);
5605   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5606   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5607   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5608   PetscFunctionReturn(0);
5609 }
5610 
5611 #undef __FUNCT__
5612 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5613 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5614 {
5615   PetscErrorCode ierr;
5616   PetscInt       m=A->rmap->n,n=B->cmap->n;
5617   Mat            Cmat;
5618 
5619   PetscFunctionBegin;
5620   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5621   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5622   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5623   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5624   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5625   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5626   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5627   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5628 
5629   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5630 
5631   *C = Cmat;
5632   PetscFunctionReturn(0);
5633 }
5634 
5635 /* ----------------------------------------------------------------*/
5636 #undef __FUNCT__
5637 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5638 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5639 {
5640   PetscErrorCode ierr;
5641 
5642   PetscFunctionBegin;
5643   if (scall == MAT_INITIAL_MATRIX) {
5644     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5645     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5646     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5647   }
5648   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5649   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5650   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5651   PetscFunctionReturn(0);
5652 }
5653 
5654 #if defined(PETSC_HAVE_MUMPS)
5655 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5656 #endif
5657 #if defined(PETSC_HAVE_PASTIX)
5658 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5659 #endif
5660 #if defined(PETSC_HAVE_SUPERLU_DIST)
5661 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5662 #endif
5663 #if defined(PETSC_HAVE_CLIQUE)
5664 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5665 #endif
5666 
5667 /*MC
5668    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5669 
5670    Options Database Keys:
5671 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5672 
5673   Level: beginner
5674 
5675 .seealso: MatCreateAIJ()
5676 M*/
5677 
5678 #undef __FUNCT__
5679 #define __FUNCT__ "MatCreate_MPIAIJ"
5680 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5681 {
5682   Mat_MPIAIJ     *b;
5683   PetscErrorCode ierr;
5684   PetscMPIInt    size;
5685 
5686   PetscFunctionBegin;
5687   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5688 
5689   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5690   B->data       = (void*)b;
5691   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5692   B->assembled  = PETSC_FALSE;
5693   B->insertmode = NOT_SET_VALUES;
5694   b->size       = size;
5695 
5696   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5697 
5698   /* build cache for off array entries formed */
5699   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5700 
5701   b->donotstash  = PETSC_FALSE;
5702   b->colmap      = 0;
5703   b->garray      = 0;
5704   b->roworiented = PETSC_TRUE;
5705 
5706   /* stuff used for matrix vector multiply */
5707   b->lvec  = NULL;
5708   b->Mvctx = NULL;
5709 
5710   /* stuff for MatGetRow() */
5711   b->rowindices   = 0;
5712   b->rowvalues    = 0;
5713   b->getrowactive = PETSC_FALSE;
5714 
5715   /* flexible pointer used in CUSP/CUSPARSE classes */
5716   b->spptr = NULL;
5717 
5718 #if defined(PETSC_HAVE_MUMPS)
5719   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5720 #endif
5721 #if defined(PETSC_HAVE_PASTIX)
5722   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5723 #endif
5724 #if defined(PETSC_HAVE_SUPERLU_DIST)
5725   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5726 #endif
5727 #if defined(PETSC_HAVE_CLIQUE)
5728   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5729 #endif
5730   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5731   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5732   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5733   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5734   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5735   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5736   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5737   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5738   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5739   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5740   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5741   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5742   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5743   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5744   PetscFunctionReturn(0);
5745 }
5746 
5747 #undef __FUNCT__
5748 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5749 /*@
5750      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5751          and "off-diagonal" part of the matrix in CSR format.
5752 
5753    Collective on MPI_Comm
5754 
5755    Input Parameters:
5756 +  comm - MPI communicator
5757 .  m - number of local rows (Cannot be PETSC_DECIDE)
5758 .  n - This value should be the same as the local size used in creating the
5759        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5760        calculated if N is given) For square matrices n is almost always m.
5761 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5762 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5763 .   i - row indices for "diagonal" portion of matrix
5764 .   j - column indices
5765 .   a - matrix values
5766 .   oi - row indices for "off-diagonal" portion of matrix
5767 .   oj - column indices
5768 -   oa - matrix values
5769 
5770    Output Parameter:
5771 .   mat - the matrix
5772 
5773    Level: advanced
5774 
5775    Notes:
5776        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5777        must free the arrays once the matrix has been destroyed and not before.
5778 
5779        The i and j indices are 0 based
5780 
5781        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5782 
5783        This sets local rows and cannot be used to set off-processor values.
5784 
5785        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5786        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5787        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5788        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5789        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5790        communication if it is known that only local entries will be set.
5791 
5792 .keywords: matrix, aij, compressed row, sparse, parallel
5793 
5794 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5795           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5796 @*/
5797 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5798 {
5799   PetscErrorCode ierr;
5800   Mat_MPIAIJ     *maij;
5801 
5802   PetscFunctionBegin;
5803   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5804   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5805   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5806   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5807   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5808   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5809   maij = (Mat_MPIAIJ*) (*mat)->data;
5810 
5811   (*mat)->preallocated = PETSC_TRUE;
5812 
5813   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5814   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5815 
5816   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5817   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5818 
5819   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5820   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5821   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5822   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5823 
5824   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5825   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5826   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5827   PetscFunctionReturn(0);
5828 }
5829 
5830 /*
5831     Special version for direct calls from Fortran
5832 */
5833 #include <petsc-private/fortranimpl.h>
5834 
5835 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5836 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5837 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5838 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5839 #endif
5840 
5841 /* Change these macros so can be used in void function */
5842 #undef CHKERRQ
5843 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5844 #undef SETERRQ2
5845 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5846 #undef SETERRQ3
5847 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5848 #undef SETERRQ
5849 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5850 
5851 #undef __FUNCT__
5852 #define __FUNCT__ "matsetvaluesmpiaij_"
5853 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5854 {
5855   Mat            mat  = *mmat;
5856   PetscInt       m    = *mm, n = *mn;
5857   InsertMode     addv = *maddv;
5858   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5859   PetscScalar    value;
5860   PetscErrorCode ierr;
5861 
5862   MatCheckPreallocated(mat,1);
5863   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5864 
5865 #if defined(PETSC_USE_DEBUG)
5866   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5867 #endif
5868   {
5869     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5870     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5871     PetscBool roworiented = aij->roworiented;
5872 
5873     /* Some Variables required in the macro */
5874     Mat        A                 = aij->A;
5875     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5876     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5877     MatScalar  *aa               = a->a;
5878     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5879     Mat        B                 = aij->B;
5880     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5881     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5882     MatScalar  *ba               = b->a;
5883 
5884     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5885     PetscInt  nonew = a->nonew;
5886     MatScalar *ap1,*ap2;
5887 
5888     PetscFunctionBegin;
5889     for (i=0; i<m; i++) {
5890       if (im[i] < 0) continue;
5891 #if defined(PETSC_USE_DEBUG)
5892       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5893 #endif
5894       if (im[i] >= rstart && im[i] < rend) {
5895         row      = im[i] - rstart;
5896         lastcol1 = -1;
5897         rp1      = aj + ai[row];
5898         ap1      = aa + ai[row];
5899         rmax1    = aimax[row];
5900         nrow1    = ailen[row];
5901         low1     = 0;
5902         high1    = nrow1;
5903         lastcol2 = -1;
5904         rp2      = bj + bi[row];
5905         ap2      = ba + bi[row];
5906         rmax2    = bimax[row];
5907         nrow2    = bilen[row];
5908         low2     = 0;
5909         high2    = nrow2;
5910 
5911         for (j=0; j<n; j++) {
5912           if (roworiented) value = v[i*n+j];
5913           else value = v[i+j*m];
5914           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5915           if (in[j] >= cstart && in[j] < cend) {
5916             col = in[j] - cstart;
5917             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5918           } else if (in[j] < 0) continue;
5919 #if defined(PETSC_USE_DEBUG)
5920           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5921 #endif
5922           else {
5923             if (mat->was_assembled) {
5924               if (!aij->colmap) {
5925                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5926               }
5927 #if defined(PETSC_USE_CTABLE)
5928               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5929               col--;
5930 #else
5931               col = aij->colmap[in[j]] - 1;
5932 #endif
5933               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5934                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5935                 col  =  in[j];
5936                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5937                 B     = aij->B;
5938                 b     = (Mat_SeqAIJ*)B->data;
5939                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5940                 rp2   = bj + bi[row];
5941                 ap2   = ba + bi[row];
5942                 rmax2 = bimax[row];
5943                 nrow2 = bilen[row];
5944                 low2  = 0;
5945                 high2 = nrow2;
5946                 bm    = aij->B->rmap->n;
5947                 ba    = b->a;
5948               }
5949             } else col = in[j];
5950             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5951           }
5952         }
5953       } else if (!aij->donotstash) {
5954         if (roworiented) {
5955           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5956         } else {
5957           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5958         }
5959       }
5960     }
5961   }
5962   PetscFunctionReturnVoid();
5963 }
5964 
5965