xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 90d9b88fc62cb359b45f67bdd8dab0c37d408c89)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatDestroy_MatRedundant"
2512 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2513 {
2514   PetscErrorCode ierr;
2515   Mat_Redundant  *redund;
2516   PetscInt       i;
2517   PetscMPIInt    size;
2518 
2519   PetscFunctionBegin;
2520   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2521   if (size == 1) {
2522     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2523     redund = a->redundant;
2524   } else {
2525     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2526     redund = a->redundant;
2527   }
2528   if (redund){
2529     ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2530     ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2531     ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2532     for (i=0; i<redund->nrecvs; i++) {
2533       ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2534       ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2535     }
2536     ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2537 
2538     if (redund->psubcomm) {
2539       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2540     }
2541 
2542     ierr = redund->Destroy(A);CHKERRQ(ierr);
2543     ierr = PetscFree(redund);CHKERRQ(ierr);
2544   }
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 #undef __FUNCT__
2549 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm"
2550 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2551 {
2552   PetscMPIInt    rank,size;
2553   MPI_Comm       comm,subcomm=psubcomm->comm;
2554   PetscErrorCode ierr;
2555   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2556   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2557   PetscInt       *rowrange = mat->rmap->range;
2558   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2559   Mat            A = aij->A,B=aij->B,C=*matredundant;
2560   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2561   PetscScalar    *sbuf_a;
2562   PetscInt       nzlocal=a->nz+b->nz;
2563   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2564   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2565   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2566   MatScalar      *aworkA,*aworkB;
2567   PetscScalar    *vals;
2568   PetscMPIInt    tag1,tag2,tag3,imdex;
2569   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2570   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2571   MPI_Status     recv_status,*send_status;
2572   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2573   PetscInt       **rbuf_j=NULL;
2574   PetscScalar    **rbuf_a=NULL;
2575   Mat_Redundant  *redund =NULL;
2576 
2577   PetscFunctionBegin;
2578   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2579   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2580   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2581   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2582   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2583 
2584   if (reuse == MAT_REUSE_MATRIX) {
2585     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2586     if (subsize == 1) {
2587       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2588       redund = c->redundant;
2589     } else {
2590       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2591       redund = c->redundant;
2592     }
2593     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2594 
2595     nsends    = redund->nsends;
2596     nrecvs    = redund->nrecvs;
2597     send_rank = redund->send_rank;
2598     recv_rank = redund->recv_rank;
2599     sbuf_nz   = redund->sbuf_nz;
2600     rbuf_nz   = redund->rbuf_nz;
2601     sbuf_j    = redund->sbuf_j;
2602     sbuf_a    = redund->sbuf_a;
2603     rbuf_j    = redund->rbuf_j;
2604     rbuf_a    = redund->rbuf_a;
2605   }
2606 
2607   if (reuse == MAT_INITIAL_MATRIX) {
2608     PetscInt    nleftover,np_subcomm;
2609 
2610     /* get the destination processors' id send_rank, nsends and nrecvs */
2611     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2612 
2613     np_subcomm = size/nsubcomm;
2614     nleftover  = size - nsubcomm*np_subcomm;
2615 
2616     nsends = 0; nrecvs = 0;
2617     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2618       /* -------------------------------------------*/
2619       for (i=0; i<size; i++) {
2620         if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2621           send_rank[nsends]   = i; nsends++;
2622           recv_rank[nrecvs++] = i;
2623           /* printf("[%d] send to and recv from [%d]\n",rank,i); */
2624         }
2625       }
2626       if (rank >= size - nleftover) { /* this proc is a leftover processor */
2627         i = size-nleftover-1;
2628         j = 0;
2629         while (j < nsubcomm - nleftover) {
2630           send_rank[nsends++] = i;
2631           i--; j++;
2632           /* printf("[%d] send to [%d]\n",rank,i); */
2633         }
2634       }
2635 
2636       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2637         for (i=0; i<nleftover; i++) {
2638           recv_rank[nrecvs++] = size-nleftover+i;
2639           /* printf("[%d] recv from [%d]\n",rank,i); */
2640         }
2641       }
2642     } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) {
2643       /* --------------------------------------------------*/
2644       /* ---------- new imples: use MatGetSubMatrices() ------------*/
2645       PetscBool flg=PETSC_FALSE;
2646       ierr = PetscOptionsGetBool(NULL,"-new",&flg,NULL);CHKERRQ(ierr);
2647       if (flg) {
2648         Mat        *matseq;
2649         IS         isrow,iscol;
2650         PetscInt   mloc_sub,rstart,rend;
2651 
2652         /* create redundant matrix */
2653         ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2654         ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2655         ierr = MatSetUp(C);CHKERRQ(ierr);
2656         ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2657         ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2658         ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2659 
2660         ierr = MatGetOwnershipRange(C,&rstart,&rend);CHKERRQ(ierr);
2661         mloc_sub = rend - rstart;
2662 
2663         printf("[%d] Use MatGetSubMatrices()...rows %d - %d, mloc_sub %d\n",rank,rstart,rend,mloc_sub);
2664         ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2665         ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2666 
2667         ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&matseq);CHKERRQ(ierr);
2668         if (rank==1) {
2669           printf("[%d] matsub:\n",rank);
2670           ierr = MatView(matseq[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
2671         }
2672 
2673         /* Create redundant matrix by concatenating local sequential
2674                  matrices from processors in this subcomm */
2675         ierr = MatDestroy(&C);CHKERRQ(ierr);
2676 
2677         ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr);
2678         *matredundant = C;
2679 
2680         if (nsubcomm == 1) {
2681           if (!rank) printf( "C\n");
2682           ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
2683         }
2684 
2685         ierr = MatDestroy(&matseq[0]);CHKERRQ(ierr);
2686         ierr = PetscFree(matseq);CHKERRQ(ierr);
2687         ierr = ISDestroy(&isrow);CHKERRQ(ierr);
2688         ierr = ISDestroy(&iscol);CHKERRQ(ierr);
2689         //ierr = MPI_Barrier(comm);CHKERRQ(ierr);
2690         //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No done yet");
2691         PetscFunctionReturn(0);
2692       }
2693 
2694       /*---------------------------------- */
2695       PetscInt color,subcommstart;
2696       subcommstart=0;
2697       for (color=0; color<nsubcomm; color++) {
2698         if (psubcomm->color != color) {
2699           for (i=0; i<psubcomm->subsize[color]; i++) {
2700             if (subrank == i) { /* my_subrank == other's subrank */
2701               send_rank[nsends++] = subcommstart+i;
2702               recv_rank[nrecvs++] = subcommstart+i;
2703               /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */
2704             }
2705           }
2706         }
2707         subcommstart += psubcomm->subsize[color];
2708       }
2709       if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */
2710         subcommstart=0;
2711         for (color=0; color<nsubcomm; color++) {
2712           subcommstart += psubcomm->subsize[color];
2713           if (psubcomm->color == color) continue;
2714           if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */
2715             send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */
2716             /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */
2717           }
2718         }
2719       }
2720 
2721       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2722         subcommstart=0;
2723         for (color=0; color<nsubcomm; color++) {
2724           subcommstart += psubcomm->subsize[color];
2725           if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */
2726             recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */
2727             /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */
2728           }
2729         }
2730       }
2731     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for PetscSubcomm type %D",psubcomm->type);
2732 
2733     /* allocate sbuf_j, sbuf_a */
2734     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2735     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2736     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2737     /*
2738     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2739     ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
2740      */
2741   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2742 
2743   /* copy mat's local entries into the buffers */
2744   if (reuse == MAT_INITIAL_MATRIX) {
2745     rownz_max = 0;
2746     rptr      = sbuf_j;
2747     cols      = sbuf_j + rend-rstart + 1;
2748     vals      = sbuf_a;
2749     rptr[0]   = 0;
2750     for (i=0; i<rend-rstart; i++) {
2751       row    = i + rstart;
2752       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2753       ncols  = nzA + nzB;
2754       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2755       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2756       /* load the column indices for this row into cols */
2757       lwrite = 0;
2758       for (l=0; l<nzB; l++) {
2759         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2760           vals[lwrite]   = aworkB[l];
2761           cols[lwrite++] = ctmp;
2762         }
2763       }
2764       for (l=0; l<nzA; l++) {
2765         vals[lwrite]   = aworkA[l];
2766         cols[lwrite++] = cstart + cworkA[l];
2767       }
2768       for (l=0; l<nzB; l++) {
2769         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2770           vals[lwrite]   = aworkB[l];
2771           cols[lwrite++] = ctmp;
2772         }
2773       }
2774       vals     += ncols;
2775       cols     += ncols;
2776       rptr[i+1] = rptr[i] + ncols;
2777       if (rownz_max < ncols) rownz_max = ncols;
2778     }
2779     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2780   } else { /* only copy matrix values into sbuf_a */
2781     rptr    = sbuf_j;
2782     vals    = sbuf_a;
2783     rptr[0] = 0;
2784     for (i=0; i<rend-rstart; i++) {
2785       row    = i + rstart;
2786       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2787       ncols  = nzA + nzB;
2788       cworkB = b->j + b->i[i];
2789       aworkA = a->a + a->i[i];
2790       aworkB = b->a + b->i[i];
2791       lwrite = 0;
2792       for (l=0; l<nzB; l++) {
2793         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2794       }
2795       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2796       for (l=0; l<nzB; l++) {
2797         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2798       }
2799       vals     += ncols;
2800       rptr[i+1] = rptr[i] + ncols;
2801     }
2802   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2803 
2804   /* send nzlocal to others, and recv other's nzlocal */
2805   /*--------------------------------------------------*/
2806   if (reuse == MAT_INITIAL_MATRIX) {
2807     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2808 
2809     s_waits2 = s_waits3 + nsends;
2810     s_waits1 = s_waits2 + nsends;
2811     r_waits1 = s_waits1 + nsends;
2812     r_waits2 = r_waits1 + nrecvs;
2813     r_waits3 = r_waits2 + nrecvs;
2814   } else {
2815     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2816 
2817     r_waits3 = s_waits3 + nsends;
2818   }
2819 
2820   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2821   if (reuse == MAT_INITIAL_MATRIX) {
2822     /* get new tags to keep the communication clean */
2823     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2824     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2825     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2826 
2827     /* post receives of other's nzlocal */
2828     for (i=0; i<nrecvs; i++) {
2829       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2830     }
2831     /* send nzlocal to others */
2832     for (i=0; i<nsends; i++) {
2833       sbuf_nz[i] = nzlocal;
2834       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2835     }
2836     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2837     count = nrecvs;
2838     while (count) {
2839       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2840 
2841       recv_rank[imdex] = recv_status.MPI_SOURCE;
2842       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2843       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2844 
2845       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2846 
2847       rbuf_nz[imdex] += i + 2;
2848 
2849       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2850       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2851       count--;
2852     }
2853     /* wait on sends of nzlocal */
2854     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2855     /* send mat->i,j to others, and recv from other's */
2856     /*------------------------------------------------*/
2857     for (i=0; i<nsends; i++) {
2858       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2859       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2860     }
2861     /* wait on receives of mat->i,j */
2862     /*------------------------------*/
2863     count = nrecvs;
2864     while (count) {
2865       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2866       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2867       count--;
2868     }
2869     /* wait on sends of mat->i,j */
2870     /*---------------------------*/
2871     if (nsends) {
2872       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2873     }
2874   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2875 
2876   /* post receives, send and receive mat->a */
2877   /*----------------------------------------*/
2878   for (imdex=0; imdex<nrecvs; imdex++) {
2879     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2880   }
2881   for (i=0; i<nsends; i++) {
2882     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2883   }
2884   count = nrecvs;
2885   while (count) {
2886     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2887     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2888     count--;
2889   }
2890   if (nsends) {
2891     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2892   }
2893 
2894   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2895 
2896   /* create redundant matrix */
2897   /*-------------------------*/
2898   if (reuse == MAT_INITIAL_MATRIX) {
2899     const PetscInt *range;
2900     PetscInt       rstart_sub,rend_sub,mloc_sub;
2901 
2902     /* compute rownz_max for preallocation */
2903     for (imdex=0; imdex<nrecvs; imdex++) {
2904       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2905       rptr = rbuf_j[imdex];
2906       for (i=0; i<j; i++) {
2907         ncols = rptr[i+1] - rptr[i];
2908         if (rownz_max < ncols) rownz_max = ncols;
2909       }
2910     }
2911 
2912     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2913 
2914     /* get local size of redundant matrix
2915        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2916     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2917     rstart_sub = range[nsubcomm*subrank];
2918     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2919       rend_sub = range[nsubcomm*(subrank+1)];
2920     } else {
2921       rend_sub = mat->rmap->N;
2922     }
2923     mloc_sub = rend_sub - rstart_sub;
2924 
2925     if (M == N) {
2926       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2927     } else { /* non-square matrix */
2928       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2929     }
2930     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2931     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2932     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2933     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2934   } else {
2935     C = *matredundant;
2936   }
2937 
2938   /* insert local matrix entries */
2939   rptr = sbuf_j;
2940   cols = sbuf_j + rend-rstart + 1;
2941   vals = sbuf_a;
2942   for (i=0; i<rend-rstart; i++) {
2943     row   = i + rstart;
2944     ncols = rptr[i+1] - rptr[i];
2945     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2946     vals += ncols;
2947     cols += ncols;
2948   }
2949   /* insert received matrix entries */
2950   for (imdex=0; imdex<nrecvs; imdex++) {
2951     rstart = rowrange[recv_rank[imdex]];
2952     rend   = rowrange[recv_rank[imdex]+1];
2953     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2954     rptr   = rbuf_j[imdex];
2955     cols   = rbuf_j[imdex] + rend-rstart + 1;
2956     vals   = rbuf_a[imdex];
2957     for (i=0; i<rend-rstart; i++) {
2958       row   = i + rstart;
2959       ncols = rptr[i+1] - rptr[i];
2960       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2961       vals += ncols;
2962       cols += ncols;
2963     }
2964   }
2965   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2966   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2967 
2968   if (reuse == MAT_INITIAL_MATRIX) {
2969     *matredundant = C;
2970 
2971     /* create a supporting struct and attach it to C for reuse */
2972     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2973     if (subsize == 1) {
2974       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2975       c->redundant = redund;
2976     } else {
2977       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2978       c->redundant = redund;
2979     }
2980 
2981     redund->nzlocal   = nzlocal;
2982     redund->nsends    = nsends;
2983     redund->nrecvs    = nrecvs;
2984     redund->send_rank = send_rank;
2985     redund->recv_rank = recv_rank;
2986     redund->sbuf_nz   = sbuf_nz;
2987     redund->rbuf_nz   = rbuf_nz;
2988     redund->sbuf_j    = sbuf_j;
2989     redund->sbuf_a    = sbuf_a;
2990     redund->rbuf_j    = rbuf_j;
2991     redund->rbuf_a    = rbuf_a;
2992     redund->psubcomm  = NULL;
2993 
2994     redund->Destroy = C->ops->destroy;
2995     C->ops->destroy = MatDestroy_MatRedundant;
2996   }
2997   PetscFunctionReturn(0);
2998 }
2999 
3000 #undef __FUNCT__
3001 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
3002 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
3003 {
3004   PetscErrorCode ierr;
3005 
3006   PetscFunctionBegin;
3007   if (subcomm == MPI_COMM_NULL || subcomm == PETSC_COMM_SELF) { /* create psubcomm */
3008     MPI_Comm       comm;
3009     PetscSubcomm   psubcomm;
3010     PetscMPIInt    size,subsize;
3011 
3012     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3013     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3014     ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
3015     ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
3016     ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr);
3017     ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
3018 
3019     ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr);
3020 
3021     /* free psubcomm in MatDestroy_MatRedundant() */
3022     ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
3023     if (subsize == 1) {
3024       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
3025       c->redundant->psubcomm = psubcomm;
3026     } else {
3027       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3028       c->redundant->psubcomm = psubcomm ;
3029     }
3030   } else {
3031     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet");
3032   }
3033   PetscFunctionReturn(0);
3034 }
3035 
3036 #undef __FUNCT__
3037 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
3038 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3039 {
3040   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3041   PetscErrorCode ierr;
3042   PetscInt       i,*idxb = 0;
3043   PetscScalar    *va,*vb;
3044   Vec            vtmp;
3045 
3046   PetscFunctionBegin;
3047   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
3048   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3049   if (idx) {
3050     for (i=0; i<A->rmap->n; i++) {
3051       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3052     }
3053   }
3054 
3055   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3056   if (idx) {
3057     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3058   }
3059   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3060   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3061 
3062   for (i=0; i<A->rmap->n; i++) {
3063     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3064       va[i] = vb[i];
3065       if (idx) idx[i] = a->garray[idxb[i]];
3066     }
3067   }
3068 
3069   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3070   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3071   ierr = PetscFree(idxb);CHKERRQ(ierr);
3072   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3073   PetscFunctionReturn(0);
3074 }
3075 
3076 #undef __FUNCT__
3077 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
3078 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3079 {
3080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3081   PetscErrorCode ierr;
3082   PetscInt       i,*idxb = 0;
3083   PetscScalar    *va,*vb;
3084   Vec            vtmp;
3085 
3086   PetscFunctionBegin;
3087   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
3088   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3089   if (idx) {
3090     for (i=0; i<A->cmap->n; i++) {
3091       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3092     }
3093   }
3094 
3095   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3096   if (idx) {
3097     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3098   }
3099   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3100   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3101 
3102   for (i=0; i<A->rmap->n; i++) {
3103     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3104       va[i] = vb[i];
3105       if (idx) idx[i] = a->garray[idxb[i]];
3106     }
3107   }
3108 
3109   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3110   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3111   ierr = PetscFree(idxb);CHKERRQ(ierr);
3112   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3113   PetscFunctionReturn(0);
3114 }
3115 
3116 #undef __FUNCT__
3117 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3118 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3119 {
3120   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3121   PetscInt       n      = A->rmap->n;
3122   PetscInt       cstart = A->cmap->rstart;
3123   PetscInt       *cmap  = mat->garray;
3124   PetscInt       *diagIdx, *offdiagIdx;
3125   Vec            diagV, offdiagV;
3126   PetscScalar    *a, *diagA, *offdiagA;
3127   PetscInt       r;
3128   PetscErrorCode ierr;
3129 
3130   PetscFunctionBegin;
3131   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3132   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3133   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3134   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3135   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3136   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3137   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3138   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3139   for (r = 0; r < n; ++r) {
3140     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3141       a[r]   = diagA[r];
3142       idx[r] = cstart + diagIdx[r];
3143     } else {
3144       a[r]   = offdiagA[r];
3145       idx[r] = cmap[offdiagIdx[r]];
3146     }
3147   }
3148   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3149   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3150   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3151   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3152   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3153   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3154   PetscFunctionReturn(0);
3155 }
3156 
3157 #undef __FUNCT__
3158 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3159 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3160 {
3161   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3162   PetscInt       n      = A->rmap->n;
3163   PetscInt       cstart = A->cmap->rstart;
3164   PetscInt       *cmap  = mat->garray;
3165   PetscInt       *diagIdx, *offdiagIdx;
3166   Vec            diagV, offdiagV;
3167   PetscScalar    *a, *diagA, *offdiagA;
3168   PetscInt       r;
3169   PetscErrorCode ierr;
3170 
3171   PetscFunctionBegin;
3172   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3173   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3174   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3175   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3176   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3177   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3178   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3179   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3180   for (r = 0; r < n; ++r) {
3181     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3182       a[r]   = diagA[r];
3183       idx[r] = cstart + diagIdx[r];
3184     } else {
3185       a[r]   = offdiagA[r];
3186       idx[r] = cmap[offdiagIdx[r]];
3187     }
3188   }
3189   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3190   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3191   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3192   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3193   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3194   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3195   PetscFunctionReturn(0);
3196 }
3197 
3198 #undef __FUNCT__
3199 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3200 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3201 {
3202   PetscErrorCode ierr;
3203   Mat            *dummy;
3204 
3205   PetscFunctionBegin;
3206   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3207   *newmat = *dummy;
3208   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3209   PetscFunctionReturn(0);
3210 }
3211 
3212 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3213 
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3216 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3217 {
3218   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3219   PetscErrorCode ierr;
3220 
3221   PetscFunctionBegin;
3222   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3223   PetscFunctionReturn(0);
3224 }
3225 
3226 #undef __FUNCT__
3227 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3228 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3229 {
3230   PetscErrorCode ierr;
3231   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3232 
3233   PetscFunctionBegin;
3234   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3235   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3236   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3237   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3238   PetscFunctionReturn(0);
3239 }
3240 
3241 /* -------------------------------------------------------------------*/
3242 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3243                                        MatGetRow_MPIAIJ,
3244                                        MatRestoreRow_MPIAIJ,
3245                                        MatMult_MPIAIJ,
3246                                 /* 4*/ MatMultAdd_MPIAIJ,
3247                                        MatMultTranspose_MPIAIJ,
3248                                        MatMultTransposeAdd_MPIAIJ,
3249 #if defined(PETSC_HAVE_PBGL)
3250                                        MatSolve_MPIAIJ,
3251 #else
3252                                        0,
3253 #endif
3254                                        0,
3255                                        0,
3256                                 /*10*/ 0,
3257                                        0,
3258                                        0,
3259                                        MatSOR_MPIAIJ,
3260                                        MatTranspose_MPIAIJ,
3261                                 /*15*/ MatGetInfo_MPIAIJ,
3262                                        MatEqual_MPIAIJ,
3263                                        MatGetDiagonal_MPIAIJ,
3264                                        MatDiagonalScale_MPIAIJ,
3265                                        MatNorm_MPIAIJ,
3266                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3267                                        MatAssemblyEnd_MPIAIJ,
3268                                        MatSetOption_MPIAIJ,
3269                                        MatZeroEntries_MPIAIJ,
3270                                 /*24*/ MatZeroRows_MPIAIJ,
3271                                        0,
3272 #if defined(PETSC_HAVE_PBGL)
3273                                        0,
3274 #else
3275                                        0,
3276 #endif
3277                                        0,
3278                                        0,
3279                                 /*29*/ MatSetUp_MPIAIJ,
3280 #if defined(PETSC_HAVE_PBGL)
3281                                        0,
3282 #else
3283                                        0,
3284 #endif
3285                                        0,
3286                                        0,
3287                                        0,
3288                                 /*34*/ MatDuplicate_MPIAIJ,
3289                                        0,
3290                                        0,
3291                                        0,
3292                                        0,
3293                                 /*39*/ MatAXPY_MPIAIJ,
3294                                        MatGetSubMatrices_MPIAIJ,
3295                                        MatIncreaseOverlap_MPIAIJ,
3296                                        MatGetValues_MPIAIJ,
3297                                        MatCopy_MPIAIJ,
3298                                 /*44*/ MatGetRowMax_MPIAIJ,
3299                                        MatScale_MPIAIJ,
3300                                        0,
3301                                        0,
3302                                        MatZeroRowsColumns_MPIAIJ,
3303                                 /*49*/ MatSetRandom_MPIAIJ,
3304                                        0,
3305                                        0,
3306                                        0,
3307                                        0,
3308                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3309                                        0,
3310                                        MatSetUnfactored_MPIAIJ,
3311                                        MatPermute_MPIAIJ,
3312                                        0,
3313                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3314                                        MatDestroy_MPIAIJ,
3315                                        MatView_MPIAIJ,
3316                                        0,
3317                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3318                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3319                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3320                                        0,
3321                                        0,
3322                                        0,
3323                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3324                                        MatGetRowMinAbs_MPIAIJ,
3325                                        0,
3326                                        MatSetColoring_MPIAIJ,
3327                                        0,
3328                                        MatSetValuesAdifor_MPIAIJ,
3329                                 /*75*/ MatFDColoringApply_AIJ,
3330                                        0,
3331                                        0,
3332                                        0,
3333                                        MatFindZeroDiagonals_MPIAIJ,
3334                                 /*80*/ 0,
3335                                        0,
3336                                        0,
3337                                 /*83*/ MatLoad_MPIAIJ,
3338                                        0,
3339                                        0,
3340                                        0,
3341                                        0,
3342                                        0,
3343                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3344                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3345                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3346                                        MatPtAP_MPIAIJ_MPIAIJ,
3347                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3348                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3349                                        0,
3350                                        0,
3351                                        0,
3352                                        0,
3353                                 /*99*/ 0,
3354                                        0,
3355                                        0,
3356                                        MatConjugate_MPIAIJ,
3357                                        0,
3358                                 /*104*/MatSetValuesRow_MPIAIJ,
3359                                        MatRealPart_MPIAIJ,
3360                                        MatImaginaryPart_MPIAIJ,
3361                                        0,
3362                                        0,
3363                                 /*109*/0,
3364                                        MatGetRedundantMatrix_MPIAIJ,
3365                                        MatGetRowMin_MPIAIJ,
3366                                        0,
3367                                        0,
3368                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3369                                        0,
3370                                        0,
3371                                        0,
3372                                        0,
3373                                 /*119*/0,
3374                                        0,
3375                                        0,
3376                                        0,
3377                                        MatGetMultiProcBlock_MPIAIJ,
3378                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3379                                        MatGetColumnNorms_MPIAIJ,
3380                                        MatInvertBlockDiagonal_MPIAIJ,
3381                                        0,
3382                                        MatGetSubMatricesParallel_MPIAIJ,
3383                                 /*129*/0,
3384                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3385                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3386                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3387                                        0,
3388                                 /*134*/0,
3389                                        0,
3390                                        0,
3391                                        0,
3392                                        0,
3393                                 /*139*/0,
3394                                        0
3395 };
3396 
3397 /* ----------------------------------------------------------------------------------------*/
3398 
3399 #undef __FUNCT__
3400 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3401 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3402 {
3403   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3404   PetscErrorCode ierr;
3405 
3406   PetscFunctionBegin;
3407   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3408   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3409   PetscFunctionReturn(0);
3410 }
3411 
3412 #undef __FUNCT__
3413 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3414 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3415 {
3416   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3417   PetscErrorCode ierr;
3418 
3419   PetscFunctionBegin;
3420   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3421   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3422   PetscFunctionReturn(0);
3423 }
3424 
3425 #undef __FUNCT__
3426 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3427 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3428 {
3429   Mat_MPIAIJ     *b;
3430   PetscErrorCode ierr;
3431 
3432   PetscFunctionBegin;
3433   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3434   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3435   b = (Mat_MPIAIJ*)B->data;
3436 
3437   if (!B->preallocated) {
3438     /* Explicitly create 2 MATSEQAIJ matrices. */
3439     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3440     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3441     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3442     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3443     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3444     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3445     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3446     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3447     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3448     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3449   }
3450 
3451   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3452   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3453   B->preallocated = PETSC_TRUE;
3454   PetscFunctionReturn(0);
3455 }
3456 
3457 #undef __FUNCT__
3458 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3459 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3460 {
3461   Mat            mat;
3462   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3463   PetscErrorCode ierr;
3464 
3465   PetscFunctionBegin;
3466   *newmat = 0;
3467   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3468   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3469   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3470   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3471   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3472   a       = (Mat_MPIAIJ*)mat->data;
3473 
3474   mat->factortype   = matin->factortype;
3475   mat->rmap->bs     = matin->rmap->bs;
3476   mat->cmap->bs     = matin->cmap->bs;
3477   mat->assembled    = PETSC_TRUE;
3478   mat->insertmode   = NOT_SET_VALUES;
3479   mat->preallocated = PETSC_TRUE;
3480 
3481   a->size         = oldmat->size;
3482   a->rank         = oldmat->rank;
3483   a->donotstash   = oldmat->donotstash;
3484   a->roworiented  = oldmat->roworiented;
3485   a->rowindices   = 0;
3486   a->rowvalues    = 0;
3487   a->getrowactive = PETSC_FALSE;
3488 
3489   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3490   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3491 
3492   if (oldmat->colmap) {
3493 #if defined(PETSC_USE_CTABLE)
3494     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3495 #else
3496     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3497     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3498     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3499 #endif
3500   } else a->colmap = 0;
3501   if (oldmat->garray) {
3502     PetscInt len;
3503     len  = oldmat->B->cmap->n;
3504     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3505     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3506     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3507   } else a->garray = 0;
3508 
3509   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3510   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3511   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3512   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3513   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3514   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3515   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3516   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3517   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3518   *newmat = mat;
3519   PetscFunctionReturn(0);
3520 }
3521 
3522 
3523 
3524 #undef __FUNCT__
3525 #define __FUNCT__ "MatLoad_MPIAIJ"
3526 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3527 {
3528   PetscScalar    *vals,*svals;
3529   MPI_Comm       comm;
3530   PetscErrorCode ierr;
3531   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3532   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3533   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3534   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3535   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3536   int            fd;
3537   PetscInt       bs = 1;
3538 
3539   PetscFunctionBegin;
3540   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3541   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3542   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3543   if (!rank) {
3544     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3545     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3546     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3547   }
3548 
3549   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3550   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3551   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3552 
3553   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3554 
3555   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3556   M    = header[1]; N = header[2];
3557   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3558   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3559   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3560 
3561   /* If global sizes are set, check if they are consistent with that given in the file */
3562   if (sizesset) {
3563     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3564   }
3565   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3566   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3567 
3568   /* determine ownership of all (block) rows */
3569   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3570   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3571   else m = newMat->rmap->n; /* Set by user */
3572 
3573   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3574   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3575 
3576   /* First process needs enough room for process with most rows */
3577   if (!rank) {
3578     mmax = rowners[1];
3579     for (i=2; i<=size; i++) {
3580       mmax = PetscMax(mmax, rowners[i]);
3581     }
3582   } else mmax = -1;             /* unused, but compilers complain */
3583 
3584   rowners[0] = 0;
3585   for (i=2; i<=size; i++) {
3586     rowners[i] += rowners[i-1];
3587   }
3588   rstart = rowners[rank];
3589   rend   = rowners[rank+1];
3590 
3591   /* distribute row lengths to all processors */
3592   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3593   if (!rank) {
3594     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3595     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3596     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3597     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3598     for (j=0; j<m; j++) {
3599       procsnz[0] += ourlens[j];
3600     }
3601     for (i=1; i<size; i++) {
3602       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3603       /* calculate the number of nonzeros on each processor */
3604       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3605         procsnz[i] += rowlengths[j];
3606       }
3607       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3608     }
3609     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3610   } else {
3611     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3612   }
3613 
3614   if (!rank) {
3615     /* determine max buffer needed and allocate it */
3616     maxnz = 0;
3617     for (i=0; i<size; i++) {
3618       maxnz = PetscMax(maxnz,procsnz[i]);
3619     }
3620     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3621 
3622     /* read in my part of the matrix column indices  */
3623     nz   = procsnz[0];
3624     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3625     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3626 
3627     /* read in every one elses and ship off */
3628     for (i=1; i<size; i++) {
3629       nz   = procsnz[i];
3630       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3631       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3632     }
3633     ierr = PetscFree(cols);CHKERRQ(ierr);
3634   } else {
3635     /* determine buffer space needed for message */
3636     nz = 0;
3637     for (i=0; i<m; i++) {
3638       nz += ourlens[i];
3639     }
3640     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3641 
3642     /* receive message of column indices*/
3643     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3644   }
3645 
3646   /* determine column ownership if matrix is not square */
3647   if (N != M) {
3648     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3649     else n = newMat->cmap->n;
3650     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3651     cstart = cend - n;
3652   } else {
3653     cstart = rstart;
3654     cend   = rend;
3655     n      = cend - cstart;
3656   }
3657 
3658   /* loop over local rows, determining number of off diagonal entries */
3659   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3660   jj   = 0;
3661   for (i=0; i<m; i++) {
3662     for (j=0; j<ourlens[i]; j++) {
3663       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3664       jj++;
3665     }
3666   }
3667 
3668   for (i=0; i<m; i++) {
3669     ourlens[i] -= offlens[i];
3670   }
3671   if (!sizesset) {
3672     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3673   }
3674 
3675   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3676 
3677   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3678 
3679   for (i=0; i<m; i++) {
3680     ourlens[i] += offlens[i];
3681   }
3682 
3683   if (!rank) {
3684     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3685 
3686     /* read in my part of the matrix numerical values  */
3687     nz   = procsnz[0];
3688     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3689 
3690     /* insert into matrix */
3691     jj      = rstart;
3692     smycols = mycols;
3693     svals   = vals;
3694     for (i=0; i<m; i++) {
3695       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3696       smycols += ourlens[i];
3697       svals   += ourlens[i];
3698       jj++;
3699     }
3700 
3701     /* read in other processors and ship out */
3702     for (i=1; i<size; i++) {
3703       nz   = procsnz[i];
3704       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3705       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3706     }
3707     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3708   } else {
3709     /* receive numeric values */
3710     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3711 
3712     /* receive message of values*/
3713     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3714 
3715     /* insert into matrix */
3716     jj      = rstart;
3717     smycols = mycols;
3718     svals   = vals;
3719     for (i=0; i<m; i++) {
3720       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3721       smycols += ourlens[i];
3722       svals   += ourlens[i];
3723       jj++;
3724     }
3725   }
3726   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3727   ierr = PetscFree(vals);CHKERRQ(ierr);
3728   ierr = PetscFree(mycols);CHKERRQ(ierr);
3729   ierr = PetscFree(rowners);CHKERRQ(ierr);
3730   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3731   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3732   PetscFunctionReturn(0);
3733 }
3734 
3735 #undef __FUNCT__
3736 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3737 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3738 {
3739   PetscErrorCode ierr;
3740   IS             iscol_local;
3741   PetscInt       csize;
3742 
3743   PetscFunctionBegin;
3744   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3745   if (call == MAT_REUSE_MATRIX) {
3746     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3747     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3748   } else {
3749     PetscInt cbs;
3750     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3751     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3752     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3753   }
3754   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3755   if (call == MAT_INITIAL_MATRIX) {
3756     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3757     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3758   }
3759   PetscFunctionReturn(0);
3760 }
3761 
3762 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3763 #undef __FUNCT__
3764 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3765 /*
3766     Not great since it makes two copies of the submatrix, first an SeqAIJ
3767   in local and then by concatenating the local matrices the end result.
3768   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3769 
3770   Note: This requires a sequential iscol with all indices.
3771 */
3772 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3773 {
3774   PetscErrorCode ierr;
3775   PetscMPIInt    rank,size;
3776   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3777   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3778   PetscBool      allcolumns, colflag;
3779   Mat            M,Mreuse;
3780   MatScalar      *vwork,*aa;
3781   MPI_Comm       comm;
3782   Mat_SeqAIJ     *aij;
3783 
3784   PetscFunctionBegin;
3785   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3786   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3787   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3788 
3789   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3790   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3791   if (colflag && ncol == mat->cmap->N) {
3792     allcolumns = PETSC_TRUE;
3793   } else {
3794     allcolumns = PETSC_FALSE;
3795   }
3796   if (call ==  MAT_REUSE_MATRIX) {
3797     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3798     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3799     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3800   } else {
3801     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3802   }
3803 
3804   /*
3805       m - number of local rows
3806       n - number of columns (same on all processors)
3807       rstart - first row in new global matrix generated
3808   */
3809   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3810   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3811   if (call == MAT_INITIAL_MATRIX) {
3812     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3813     ii  = aij->i;
3814     jj  = aij->j;
3815 
3816     /*
3817         Determine the number of non-zeros in the diagonal and off-diagonal
3818         portions of the matrix in order to do correct preallocation
3819     */
3820 
3821     /* first get start and end of "diagonal" columns */
3822     if (csize == PETSC_DECIDE) {
3823       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3824       if (mglobal == n) { /* square matrix */
3825         nlocal = m;
3826       } else {
3827         nlocal = n/size + ((n % size) > rank);
3828       }
3829     } else {
3830       nlocal = csize;
3831     }
3832     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3833     rstart = rend - nlocal;
3834     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3835 
3836     /* next, compute all the lengths */
3837     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3838     olens = dlens + m;
3839     for (i=0; i<m; i++) {
3840       jend = ii[i+1] - ii[i];
3841       olen = 0;
3842       dlen = 0;
3843       for (j=0; j<jend; j++) {
3844         if (*jj < rstart || *jj >= rend) olen++;
3845         else dlen++;
3846         jj++;
3847       }
3848       olens[i] = olen;
3849       dlens[i] = dlen;
3850     }
3851     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3852     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3853     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3854     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3855     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3856     ierr = PetscFree(dlens);CHKERRQ(ierr);
3857   } else {
3858     PetscInt ml,nl;
3859 
3860     M    = *newmat;
3861     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3862     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3863     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3864     /*
3865          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3866        rather than the slower MatSetValues().
3867     */
3868     M->was_assembled = PETSC_TRUE;
3869     M->assembled     = PETSC_FALSE;
3870   }
3871   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3872   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3873   ii   = aij->i;
3874   jj   = aij->j;
3875   aa   = aij->a;
3876   for (i=0; i<m; i++) {
3877     row   = rstart + i;
3878     nz    = ii[i+1] - ii[i];
3879     cwork = jj;     jj += nz;
3880     vwork = aa;     aa += nz;
3881     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3882   }
3883 
3884   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3885   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3886   *newmat = M;
3887 
3888   /* save submatrix used in processor for next request */
3889   if (call ==  MAT_INITIAL_MATRIX) {
3890     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3891     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3892   }
3893   PetscFunctionReturn(0);
3894 }
3895 
3896 #undef __FUNCT__
3897 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3898 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3899 {
3900   PetscInt       m,cstart, cend,j,nnz,i,d;
3901   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3902   const PetscInt *JJ;
3903   PetscScalar    *values;
3904   PetscErrorCode ierr;
3905 
3906   PetscFunctionBegin;
3907   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3908 
3909   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3910   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3911   m      = B->rmap->n;
3912   cstart = B->cmap->rstart;
3913   cend   = B->cmap->rend;
3914   rstart = B->rmap->rstart;
3915 
3916   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3917 
3918 #if defined(PETSC_USE_DEBUGGING)
3919   for (i=0; i<m; i++) {
3920     nnz = Ii[i+1]- Ii[i];
3921     JJ  = J + Ii[i];
3922     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3923     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3924     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3925   }
3926 #endif
3927 
3928   for (i=0; i<m; i++) {
3929     nnz     = Ii[i+1]- Ii[i];
3930     JJ      = J + Ii[i];
3931     nnz_max = PetscMax(nnz_max,nnz);
3932     d       = 0;
3933     for (j=0; j<nnz; j++) {
3934       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3935     }
3936     d_nnz[i] = d;
3937     o_nnz[i] = nnz - d;
3938   }
3939   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3940   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3941 
3942   if (v) values = (PetscScalar*)v;
3943   else {
3944     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3945     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3946   }
3947 
3948   for (i=0; i<m; i++) {
3949     ii   = i + rstart;
3950     nnz  = Ii[i+1]- Ii[i];
3951     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3952   }
3953   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3954   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3955 
3956   if (!v) {
3957     ierr = PetscFree(values);CHKERRQ(ierr);
3958   }
3959   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3960   PetscFunctionReturn(0);
3961 }
3962 
3963 #undef __FUNCT__
3964 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3965 /*@
3966    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3967    (the default parallel PETSc format).
3968 
3969    Collective on MPI_Comm
3970 
3971    Input Parameters:
3972 +  B - the matrix
3973 .  i - the indices into j for the start of each local row (starts with zero)
3974 .  j - the column indices for each local row (starts with zero)
3975 -  v - optional values in the matrix
3976 
3977    Level: developer
3978 
3979    Notes:
3980        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3981      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3982      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3983 
3984        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3985 
3986        The format which is used for the sparse matrix input, is equivalent to a
3987     row-major ordering.. i.e for the following matrix, the input data expected is
3988     as shown:
3989 
3990         1 0 0
3991         2 0 3     P0
3992        -------
3993         4 5 6     P1
3994 
3995      Process0 [P0]: rows_owned=[0,1]
3996         i =  {0,1,3}  [size = nrow+1  = 2+1]
3997         j =  {0,0,2}  [size = nz = 6]
3998         v =  {1,2,3}  [size = nz = 6]
3999 
4000      Process1 [P1]: rows_owned=[2]
4001         i =  {0,3}    [size = nrow+1  = 1+1]
4002         j =  {0,1,2}  [size = nz = 6]
4003         v =  {4,5,6}  [size = nz = 6]
4004 
4005 .keywords: matrix, aij, compressed row, sparse, parallel
4006 
4007 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
4008           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4009 @*/
4010 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4011 {
4012   PetscErrorCode ierr;
4013 
4014   PetscFunctionBegin;
4015   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4016   PetscFunctionReturn(0);
4017 }
4018 
4019 #undef __FUNCT__
4020 #define __FUNCT__ "MatMPIAIJSetPreallocation"
4021 /*@C
4022    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4023    (the default parallel PETSc format).  For good matrix assembly performance
4024    the user should preallocate the matrix storage by setting the parameters
4025    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4026    performance can be increased by more than a factor of 50.
4027 
4028    Collective on MPI_Comm
4029 
4030    Input Parameters:
4031 +  A - the matrix
4032 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4033            (same value is used for all local rows)
4034 .  d_nnz - array containing the number of nonzeros in the various rows of the
4035            DIAGONAL portion of the local submatrix (possibly different for each row)
4036            or NULL, if d_nz is used to specify the nonzero structure.
4037            The size of this array is equal to the number of local rows, i.e 'm'.
4038            For matrices that will be factored, you must leave room for (and set)
4039            the diagonal entry even if it is zero.
4040 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4041            submatrix (same value is used for all local rows).
4042 -  o_nnz - array containing the number of nonzeros in the various rows of the
4043            OFF-DIAGONAL portion of the local submatrix (possibly different for
4044            each row) or NULL, if o_nz is used to specify the nonzero
4045            structure. The size of this array is equal to the number
4046            of local rows, i.e 'm'.
4047 
4048    If the *_nnz parameter is given then the *_nz parameter is ignored
4049 
4050    The AIJ format (also called the Yale sparse matrix format or
4051    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4052    storage.  The stored row and column indices begin with zero.
4053    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
4054 
4055    The parallel matrix is partitioned such that the first m0 rows belong to
4056    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4057    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4058 
4059    The DIAGONAL portion of the local submatrix of a processor can be defined
4060    as the submatrix which is obtained by extraction the part corresponding to
4061    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4062    first row that belongs to the processor, r2 is the last row belonging to
4063    the this processor, and c1-c2 is range of indices of the local part of a
4064    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4065    common case of a square matrix, the row and column ranges are the same and
4066    the DIAGONAL part is also square. The remaining portion of the local
4067    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4068 
4069    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4070 
4071    You can call MatGetInfo() to get information on how effective the preallocation was;
4072    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4073    You can also run with the option -info and look for messages with the string
4074    malloc in them to see if additional memory allocation was needed.
4075 
4076    Example usage:
4077 
4078    Consider the following 8x8 matrix with 34 non-zero values, that is
4079    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4080    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4081    as follows:
4082 
4083 .vb
4084             1  2  0  |  0  3  0  |  0  4
4085     Proc0   0  5  6  |  7  0  0  |  8  0
4086             9  0 10  | 11  0  0  | 12  0
4087     -------------------------------------
4088            13  0 14  | 15 16 17  |  0  0
4089     Proc1   0 18  0  | 19 20 21  |  0  0
4090             0  0  0  | 22 23  0  | 24  0
4091     -------------------------------------
4092     Proc2  25 26 27  |  0  0 28  | 29  0
4093            30  0  0  | 31 32 33  |  0 34
4094 .ve
4095 
4096    This can be represented as a collection of submatrices as:
4097 
4098 .vb
4099       A B C
4100       D E F
4101       G H I
4102 .ve
4103 
4104    Where the submatrices A,B,C are owned by proc0, D,E,F are
4105    owned by proc1, G,H,I are owned by proc2.
4106 
4107    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4108    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4109    The 'M','N' parameters are 8,8, and have the same values on all procs.
4110 
4111    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4112    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4113    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4114    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4115    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4116    matrix, ans [DF] as another SeqAIJ matrix.
4117 
4118    When d_nz, o_nz parameters are specified, d_nz storage elements are
4119    allocated for every row of the local diagonal submatrix, and o_nz
4120    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4121    One way to choose d_nz and o_nz is to use the max nonzerors per local
4122    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4123    In this case, the values of d_nz,o_nz are:
4124 .vb
4125      proc0 : dnz = 2, o_nz = 2
4126      proc1 : dnz = 3, o_nz = 2
4127      proc2 : dnz = 1, o_nz = 4
4128 .ve
4129    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4130    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4131    for proc3. i.e we are using 12+15+10=37 storage locations to store
4132    34 values.
4133 
4134    When d_nnz, o_nnz parameters are specified, the storage is specified
4135    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4136    In the above case the values for d_nnz,o_nnz are:
4137 .vb
4138      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4139      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4140      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4141 .ve
4142    Here the space allocated is sum of all the above values i.e 34, and
4143    hence pre-allocation is perfect.
4144 
4145    Level: intermediate
4146 
4147 .keywords: matrix, aij, compressed row, sparse, parallel
4148 
4149 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4150           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4151 @*/
4152 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4153 {
4154   PetscErrorCode ierr;
4155 
4156   PetscFunctionBegin;
4157   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4158   PetscValidType(B,1);
4159   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4160   PetscFunctionReturn(0);
4161 }
4162 
4163 #undef __FUNCT__
4164 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4165 /*@
4166      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4167          CSR format the local rows.
4168 
4169    Collective on MPI_Comm
4170 
4171    Input Parameters:
4172 +  comm - MPI communicator
4173 .  m - number of local rows (Cannot be PETSC_DECIDE)
4174 .  n - This value should be the same as the local size used in creating the
4175        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4176        calculated if N is given) For square matrices n is almost always m.
4177 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4178 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4179 .   i - row indices
4180 .   j - column indices
4181 -   a - matrix values
4182 
4183    Output Parameter:
4184 .   mat - the matrix
4185 
4186    Level: intermediate
4187 
4188    Notes:
4189        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4190      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4191      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4192 
4193        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4194 
4195        The format which is used for the sparse matrix input, is equivalent to a
4196     row-major ordering.. i.e for the following matrix, the input data expected is
4197     as shown:
4198 
4199         1 0 0
4200         2 0 3     P0
4201        -------
4202         4 5 6     P1
4203 
4204      Process0 [P0]: rows_owned=[0,1]
4205         i =  {0,1,3}  [size = nrow+1  = 2+1]
4206         j =  {0,0,2}  [size = nz = 6]
4207         v =  {1,2,3}  [size = nz = 6]
4208 
4209      Process1 [P1]: rows_owned=[2]
4210         i =  {0,3}    [size = nrow+1  = 1+1]
4211         j =  {0,1,2}  [size = nz = 6]
4212         v =  {4,5,6}  [size = nz = 6]
4213 
4214 .keywords: matrix, aij, compressed row, sparse, parallel
4215 
4216 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4217           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4218 @*/
4219 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4220 {
4221   PetscErrorCode ierr;
4222 
4223   PetscFunctionBegin;
4224   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4225   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4226   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4227   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4228   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4229   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4230   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4231   PetscFunctionReturn(0);
4232 }
4233 
4234 #undef __FUNCT__
4235 #define __FUNCT__ "MatCreateAIJ"
4236 /*@C
4237    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4238    (the default parallel PETSc format).  For good matrix assembly performance
4239    the user should preallocate the matrix storage by setting the parameters
4240    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4241    performance can be increased by more than a factor of 50.
4242 
4243    Collective on MPI_Comm
4244 
4245    Input Parameters:
4246 +  comm - MPI communicator
4247 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4248            This value should be the same as the local size used in creating the
4249            y vector for the matrix-vector product y = Ax.
4250 .  n - This value should be the same as the local size used in creating the
4251        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4252        calculated if N is given) For square matrices n is almost always m.
4253 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4254 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4255 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4256            (same value is used for all local rows)
4257 .  d_nnz - array containing the number of nonzeros in the various rows of the
4258            DIAGONAL portion of the local submatrix (possibly different for each row)
4259            or NULL, if d_nz is used to specify the nonzero structure.
4260            The size of this array is equal to the number of local rows, i.e 'm'.
4261 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4262            submatrix (same value is used for all local rows).
4263 -  o_nnz - array containing the number of nonzeros in the various rows of the
4264            OFF-DIAGONAL portion of the local submatrix (possibly different for
4265            each row) or NULL, if o_nz is used to specify the nonzero
4266            structure. The size of this array is equal to the number
4267            of local rows, i.e 'm'.
4268 
4269    Output Parameter:
4270 .  A - the matrix
4271 
4272    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4273    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4274    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4275 
4276    Notes:
4277    If the *_nnz parameter is given then the *_nz parameter is ignored
4278 
4279    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4280    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4281    storage requirements for this matrix.
4282 
4283    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4284    processor than it must be used on all processors that share the object for
4285    that argument.
4286 
4287    The user MUST specify either the local or global matrix dimensions
4288    (possibly both).
4289 
4290    The parallel matrix is partitioned across processors such that the
4291    first m0 rows belong to process 0, the next m1 rows belong to
4292    process 1, the next m2 rows belong to process 2 etc.. where
4293    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4294    values corresponding to [m x N] submatrix.
4295 
4296    The columns are logically partitioned with the n0 columns belonging
4297    to 0th partition, the next n1 columns belonging to the next
4298    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4299 
4300    The DIAGONAL portion of the local submatrix on any given processor
4301    is the submatrix corresponding to the rows and columns m,n
4302    corresponding to the given processor. i.e diagonal matrix on
4303    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4304    etc. The remaining portion of the local submatrix [m x (N-n)]
4305    constitute the OFF-DIAGONAL portion. The example below better
4306    illustrates this concept.
4307 
4308    For a square global matrix we define each processor's diagonal portion
4309    to be its local rows and the corresponding columns (a square submatrix);
4310    each processor's off-diagonal portion encompasses the remainder of the
4311    local matrix (a rectangular submatrix).
4312 
4313    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4314 
4315    When calling this routine with a single process communicator, a matrix of
4316    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4317    type of communicator, use the construction mechanism:
4318      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4319 
4320    By default, this format uses inodes (identical nodes) when possible.
4321    We search for consecutive rows with the same nonzero structure, thereby
4322    reusing matrix information to achieve increased efficiency.
4323 
4324    Options Database Keys:
4325 +  -mat_no_inode  - Do not use inodes
4326 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4327 -  -mat_aij_oneindex - Internally use indexing starting at 1
4328         rather than 0.  Note that when calling MatSetValues(),
4329         the user still MUST index entries starting at 0!
4330 
4331 
4332    Example usage:
4333 
4334    Consider the following 8x8 matrix with 34 non-zero values, that is
4335    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4336    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4337    as follows:
4338 
4339 .vb
4340             1  2  0  |  0  3  0  |  0  4
4341     Proc0   0  5  6  |  7  0  0  |  8  0
4342             9  0 10  | 11  0  0  | 12  0
4343     -------------------------------------
4344            13  0 14  | 15 16 17  |  0  0
4345     Proc1   0 18  0  | 19 20 21  |  0  0
4346             0  0  0  | 22 23  0  | 24  0
4347     -------------------------------------
4348     Proc2  25 26 27  |  0  0 28  | 29  0
4349            30  0  0  | 31 32 33  |  0 34
4350 .ve
4351 
4352    This can be represented as a collection of submatrices as:
4353 
4354 .vb
4355       A B C
4356       D E F
4357       G H I
4358 .ve
4359 
4360    Where the submatrices A,B,C are owned by proc0, D,E,F are
4361    owned by proc1, G,H,I are owned by proc2.
4362 
4363    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4364    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4365    The 'M','N' parameters are 8,8, and have the same values on all procs.
4366 
4367    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4368    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4369    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4370    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4371    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4372    matrix, ans [DF] as another SeqAIJ matrix.
4373 
4374    When d_nz, o_nz parameters are specified, d_nz storage elements are
4375    allocated for every row of the local diagonal submatrix, and o_nz
4376    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4377    One way to choose d_nz and o_nz is to use the max nonzerors per local
4378    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4379    In this case, the values of d_nz,o_nz are:
4380 .vb
4381      proc0 : dnz = 2, o_nz = 2
4382      proc1 : dnz = 3, o_nz = 2
4383      proc2 : dnz = 1, o_nz = 4
4384 .ve
4385    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4386    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4387    for proc3. i.e we are using 12+15+10=37 storage locations to store
4388    34 values.
4389 
4390    When d_nnz, o_nnz parameters are specified, the storage is specified
4391    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4392    In the above case the values for d_nnz,o_nnz are:
4393 .vb
4394      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4395      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4396      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4397 .ve
4398    Here the space allocated is sum of all the above values i.e 34, and
4399    hence pre-allocation is perfect.
4400 
4401    Level: intermediate
4402 
4403 .keywords: matrix, aij, compressed row, sparse, parallel
4404 
4405 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4406           MPIAIJ, MatCreateMPIAIJWithArrays()
4407 @*/
4408 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4409 {
4410   PetscErrorCode ierr;
4411   PetscMPIInt    size;
4412 
4413   PetscFunctionBegin;
4414   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4415   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4416   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4417   if (size > 1) {
4418     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4419     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4420   } else {
4421     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4422     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4423   }
4424   PetscFunctionReturn(0);
4425 }
4426 
4427 #undef __FUNCT__
4428 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4429 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4430 {
4431   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4432 
4433   PetscFunctionBegin;
4434   *Ad     = a->A;
4435   *Ao     = a->B;
4436   *colmap = a->garray;
4437   PetscFunctionReturn(0);
4438 }
4439 
4440 #undef __FUNCT__
4441 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4442 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4443 {
4444   PetscErrorCode ierr;
4445   PetscInt       i;
4446   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4447 
4448   PetscFunctionBegin;
4449   if (coloring->ctype == IS_COLORING_GLOBAL) {
4450     ISColoringValue *allcolors,*colors;
4451     ISColoring      ocoloring;
4452 
4453     /* set coloring for diagonal portion */
4454     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4455 
4456     /* set coloring for off-diagonal portion */
4457     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4458     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4459     for (i=0; i<a->B->cmap->n; i++) {
4460       colors[i] = allcolors[a->garray[i]];
4461     }
4462     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4463     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4464     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4465     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4466   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4467     ISColoringValue *colors;
4468     PetscInt        *larray;
4469     ISColoring      ocoloring;
4470 
4471     /* set coloring for diagonal portion */
4472     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4473     for (i=0; i<a->A->cmap->n; i++) {
4474       larray[i] = i + A->cmap->rstart;
4475     }
4476     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4477     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4478     for (i=0; i<a->A->cmap->n; i++) {
4479       colors[i] = coloring->colors[larray[i]];
4480     }
4481     ierr = PetscFree(larray);CHKERRQ(ierr);
4482     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4483     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4484     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4485 
4486     /* set coloring for off-diagonal portion */
4487     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4488     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4489     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4490     for (i=0; i<a->B->cmap->n; i++) {
4491       colors[i] = coloring->colors[larray[i]];
4492     }
4493     ierr = PetscFree(larray);CHKERRQ(ierr);
4494     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4495     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4496     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4497   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4498   PetscFunctionReturn(0);
4499 }
4500 
4501 #undef __FUNCT__
4502 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4503 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4504 {
4505   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4506   PetscErrorCode ierr;
4507 
4508   PetscFunctionBegin;
4509   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4510   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4511   PetscFunctionReturn(0);
4512 }
4513 
4514 #undef __FUNCT__
4515 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4516 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4517 {
4518   PetscErrorCode ierr;
4519   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4520   PetscInt       *indx;
4521 
4522   PetscFunctionBegin;
4523   /* This routine will ONLY return MPIAIJ type matrix */
4524   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4525   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4526   if (n == PETSC_DECIDE) {
4527     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4528   }
4529   /* Check sum(n) = N */
4530   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4531   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4532 
4533   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4534   rstart -= m;
4535 
4536   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4537   for (i=0; i<m; i++) {
4538     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4539     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4540     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4541   }
4542 
4543   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4544   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4545   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4546   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4547   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4548   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4549   PetscFunctionReturn(0);
4550 }
4551 
4552 #undef __FUNCT__
4553 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4554 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4555 {
4556   PetscErrorCode ierr;
4557   PetscInt       m,N,i,rstart,nnz,Ii;
4558   PetscInt       *indx;
4559   PetscScalar    *values;
4560 
4561   PetscFunctionBegin;
4562   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4563   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4564   for (i=0; i<m; i++) {
4565     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4566     Ii   = i + rstart;
4567     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4568     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4569   }
4570   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4571   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4572   PetscFunctionReturn(0);
4573 }
4574 
4575 #undef __FUNCT__
4576 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4577 /*@
4578       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4579                  matrices from each processor
4580 
4581     Collective on MPI_Comm
4582 
4583    Input Parameters:
4584 +    comm - the communicators the parallel matrix will live on
4585 .    inmat - the input sequential matrices
4586 .    n - number of local columns (or PETSC_DECIDE)
4587 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4588 
4589    Output Parameter:
4590 .    outmat - the parallel matrix generated
4591 
4592     Level: advanced
4593 
4594    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4595 
4596 @*/
4597 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4598 {
4599   PetscErrorCode ierr;
4600 
4601   PetscFunctionBegin;
4602   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4603   if (scall == MAT_INITIAL_MATRIX) {
4604     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4605   }
4606   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4607   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4608   PetscFunctionReturn(0);
4609 }
4610 
4611 #undef __FUNCT__
4612 #define __FUNCT__ "MatFileSplit"
4613 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4614 {
4615   PetscErrorCode    ierr;
4616   PetscMPIInt       rank;
4617   PetscInt          m,N,i,rstart,nnz;
4618   size_t            len;
4619   const PetscInt    *indx;
4620   PetscViewer       out;
4621   char              *name;
4622   Mat               B;
4623   const PetscScalar *values;
4624 
4625   PetscFunctionBegin;
4626   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4627   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4628   /* Should this be the type of the diagonal block of A? */
4629   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4630   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4631   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4632   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4633   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4634   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4635   for (i=0; i<m; i++) {
4636     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4637     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4638     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4639   }
4640   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4641   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4642 
4643   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4644   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4645   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4646   sprintf(name,"%s.%d",outfile,rank);
4647   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4648   ierr = PetscFree(name);CHKERRQ(ierr);
4649   ierr = MatView(B,out);CHKERRQ(ierr);
4650   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4651   ierr = MatDestroy(&B);CHKERRQ(ierr);
4652   PetscFunctionReturn(0);
4653 }
4654 
4655 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4656 #undef __FUNCT__
4657 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4658 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4659 {
4660   PetscErrorCode      ierr;
4661   Mat_Merge_SeqsToMPI *merge;
4662   PetscContainer      container;
4663 
4664   PetscFunctionBegin;
4665   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4666   if (container) {
4667     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4668     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4669     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4670     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4671     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4672     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4673     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4674     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4675     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4676     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4677     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4678     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4679     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4680     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4681     ierr = PetscFree(merge);CHKERRQ(ierr);
4682     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4683   }
4684   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4685   PetscFunctionReturn(0);
4686 }
4687 
4688 #include <../src/mat/utils/freespace.h>
4689 #include <petscbt.h>
4690 
4691 #undef __FUNCT__
4692 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4693 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4694 {
4695   PetscErrorCode      ierr;
4696   MPI_Comm            comm;
4697   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4698   PetscMPIInt         size,rank,taga,*len_s;
4699   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4700   PetscInt            proc,m;
4701   PetscInt            **buf_ri,**buf_rj;
4702   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4703   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4704   MPI_Request         *s_waits,*r_waits;
4705   MPI_Status          *status;
4706   MatScalar           *aa=a->a;
4707   MatScalar           **abuf_r,*ba_i;
4708   Mat_Merge_SeqsToMPI *merge;
4709   PetscContainer      container;
4710 
4711   PetscFunctionBegin;
4712   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4713   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4714 
4715   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4716   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4717 
4718   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4719   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4720 
4721   bi     = merge->bi;
4722   bj     = merge->bj;
4723   buf_ri = merge->buf_ri;
4724   buf_rj = merge->buf_rj;
4725 
4726   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4727   owners = merge->rowmap->range;
4728   len_s  = merge->len_s;
4729 
4730   /* send and recv matrix values */
4731   /*-----------------------------*/
4732   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4733   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4734 
4735   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4736   for (proc=0,k=0; proc<size; proc++) {
4737     if (!len_s[proc]) continue;
4738     i    = owners[proc];
4739     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4740     k++;
4741   }
4742 
4743   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4744   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4745   ierr = PetscFree(status);CHKERRQ(ierr);
4746 
4747   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4748   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4749 
4750   /* insert mat values of mpimat */
4751   /*----------------------------*/
4752   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4753   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4754 
4755   for (k=0; k<merge->nrecv; k++) {
4756     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4757     nrows       = *(buf_ri_k[k]);
4758     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4759     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4760   }
4761 
4762   /* set values of ba */
4763   m = merge->rowmap->n;
4764   for (i=0; i<m; i++) {
4765     arow = owners[rank] + i;
4766     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4767     bnzi = bi[i+1] - bi[i];
4768     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4769 
4770     /* add local non-zero vals of this proc's seqmat into ba */
4771     anzi   = ai[arow+1] - ai[arow];
4772     aj     = a->j + ai[arow];
4773     aa     = a->a + ai[arow];
4774     nextaj = 0;
4775     for (j=0; nextaj<anzi; j++) {
4776       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4777         ba_i[j] += aa[nextaj++];
4778       }
4779     }
4780 
4781     /* add received vals into ba */
4782     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4783       /* i-th row */
4784       if (i == *nextrow[k]) {
4785         anzi   = *(nextai[k]+1) - *nextai[k];
4786         aj     = buf_rj[k] + *(nextai[k]);
4787         aa     = abuf_r[k] + *(nextai[k]);
4788         nextaj = 0;
4789         for (j=0; nextaj<anzi; j++) {
4790           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4791             ba_i[j] += aa[nextaj++];
4792           }
4793         }
4794         nextrow[k]++; nextai[k]++;
4795       }
4796     }
4797     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4798   }
4799   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4800   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4801 
4802   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4803   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4804   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4805   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4806   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4807   PetscFunctionReturn(0);
4808 }
4809 
4810 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4811 
4812 #undef __FUNCT__
4813 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4814 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4815 {
4816   PetscErrorCode      ierr;
4817   Mat                 B_mpi;
4818   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4819   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4820   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4821   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4822   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4823   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4824   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4825   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4826   MPI_Status          *status;
4827   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4828   PetscBT             lnkbt;
4829   Mat_Merge_SeqsToMPI *merge;
4830   PetscContainer      container;
4831 
4832   PetscFunctionBegin;
4833   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4834 
4835   /* make sure it is a PETSc comm */
4836   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4837   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4838   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4839 
4840   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4841   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4842 
4843   /* determine row ownership */
4844   /*---------------------------------------------------------*/
4845   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4846   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4847   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4848   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4849   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4850   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4851   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4852 
4853   m      = merge->rowmap->n;
4854   owners = merge->rowmap->range;
4855 
4856   /* determine the number of messages to send, their lengths */
4857   /*---------------------------------------------------------*/
4858   len_s = merge->len_s;
4859 
4860   len          = 0; /* length of buf_si[] */
4861   merge->nsend = 0;
4862   for (proc=0; proc<size; proc++) {
4863     len_si[proc] = 0;
4864     if (proc == rank) {
4865       len_s[proc] = 0;
4866     } else {
4867       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4868       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4869     }
4870     if (len_s[proc]) {
4871       merge->nsend++;
4872       nrows = 0;
4873       for (i=owners[proc]; i<owners[proc+1]; i++) {
4874         if (ai[i+1] > ai[i]) nrows++;
4875       }
4876       len_si[proc] = 2*(nrows+1);
4877       len         += len_si[proc];
4878     }
4879   }
4880 
4881   /* determine the number and length of messages to receive for ij-structure */
4882   /*-------------------------------------------------------------------------*/
4883   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4884   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4885 
4886   /* post the Irecv of j-structure */
4887   /*-------------------------------*/
4888   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4889   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4890 
4891   /* post the Isend of j-structure */
4892   /*--------------------------------*/
4893   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4894 
4895   for (proc=0, k=0; proc<size; proc++) {
4896     if (!len_s[proc]) continue;
4897     i    = owners[proc];
4898     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4899     k++;
4900   }
4901 
4902   /* receives and sends of j-structure are complete */
4903   /*------------------------------------------------*/
4904   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4905   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4906 
4907   /* send and recv i-structure */
4908   /*---------------------------*/
4909   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4910   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4911 
4912   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4913   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4914   for (proc=0,k=0; proc<size; proc++) {
4915     if (!len_s[proc]) continue;
4916     /* form outgoing message for i-structure:
4917          buf_si[0]:                 nrows to be sent
4918                [1:nrows]:           row index (global)
4919                [nrows+1:2*nrows+1]: i-structure index
4920     */
4921     /*-------------------------------------------*/
4922     nrows       = len_si[proc]/2 - 1;
4923     buf_si_i    = buf_si + nrows+1;
4924     buf_si[0]   = nrows;
4925     buf_si_i[0] = 0;
4926     nrows       = 0;
4927     for (i=owners[proc]; i<owners[proc+1]; i++) {
4928       anzi = ai[i+1] - ai[i];
4929       if (anzi) {
4930         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4931         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4932         nrows++;
4933       }
4934     }
4935     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4936     k++;
4937     buf_si += len_si[proc];
4938   }
4939 
4940   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4941   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4942 
4943   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4944   for (i=0; i<merge->nrecv; i++) {
4945     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4946   }
4947 
4948   ierr = PetscFree(len_si);CHKERRQ(ierr);
4949   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4950   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4951   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4952   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4953   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4954   ierr = PetscFree(status);CHKERRQ(ierr);
4955 
4956   /* compute a local seq matrix in each processor */
4957   /*----------------------------------------------*/
4958   /* allocate bi array and free space for accumulating nonzero column info */
4959   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4960   bi[0] = 0;
4961 
4962   /* create and initialize a linked list */
4963   nlnk = N+1;
4964   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4965 
4966   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4967   len  = ai[owners[rank+1]] - ai[owners[rank]];
4968   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4969 
4970   current_space = free_space;
4971 
4972   /* determine symbolic info for each local row */
4973   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4974 
4975   for (k=0; k<merge->nrecv; k++) {
4976     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4977     nrows       = *buf_ri_k[k];
4978     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4979     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4980   }
4981 
4982   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4983   len  = 0;
4984   for (i=0; i<m; i++) {
4985     bnzi = 0;
4986     /* add local non-zero cols of this proc's seqmat into lnk */
4987     arow  = owners[rank] + i;
4988     anzi  = ai[arow+1] - ai[arow];
4989     aj    = a->j + ai[arow];
4990     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4991     bnzi += nlnk;
4992     /* add received col data into lnk */
4993     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4994       if (i == *nextrow[k]) { /* i-th row */
4995         anzi  = *(nextai[k]+1) - *nextai[k];
4996         aj    = buf_rj[k] + *nextai[k];
4997         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4998         bnzi += nlnk;
4999         nextrow[k]++; nextai[k]++;
5000       }
5001     }
5002     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5003 
5004     /* if free space is not available, make more free space */
5005     if (current_space->local_remaining<bnzi) {
5006       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
5007       nspacedouble++;
5008     }
5009     /* copy data into free space, then initialize lnk */
5010     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5011     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5012 
5013     current_space->array           += bnzi;
5014     current_space->local_used      += bnzi;
5015     current_space->local_remaining -= bnzi;
5016 
5017     bi[i+1] = bi[i] + bnzi;
5018   }
5019 
5020   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5021 
5022   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
5023   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5024   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5025 
5026   /* create symbolic parallel matrix B_mpi */
5027   /*---------------------------------------*/
5028   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5029   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5030   if (n==PETSC_DECIDE) {
5031     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5032   } else {
5033     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5034   }
5035   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5036   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5037   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5038   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5039   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5040 
5041   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5042   B_mpi->assembled    = PETSC_FALSE;
5043   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5044   merge->bi           = bi;
5045   merge->bj           = bj;
5046   merge->buf_ri       = buf_ri;
5047   merge->buf_rj       = buf_rj;
5048   merge->coi          = NULL;
5049   merge->coj          = NULL;
5050   merge->owners_co    = NULL;
5051 
5052   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5053 
5054   /* attach the supporting struct to B_mpi for reuse */
5055   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5056   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5057   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5058   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5059   *mpimat = B_mpi;
5060 
5061   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5062   PetscFunctionReturn(0);
5063 }
5064 
5065 #undef __FUNCT__
5066 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
5067 /*@C
5068       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
5069                  matrices from each processor
5070 
5071     Collective on MPI_Comm
5072 
5073    Input Parameters:
5074 +    comm - the communicators the parallel matrix will live on
5075 .    seqmat - the input sequential matrices
5076 .    m - number of local rows (or PETSC_DECIDE)
5077 .    n - number of local columns (or PETSC_DECIDE)
5078 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5079 
5080    Output Parameter:
5081 .    mpimat - the parallel matrix generated
5082 
5083     Level: advanced
5084 
5085    Notes:
5086      The dimensions of the sequential matrix in each processor MUST be the same.
5087      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5088      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5089 @*/
5090 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5091 {
5092   PetscErrorCode ierr;
5093   PetscMPIInt    size;
5094 
5095   PetscFunctionBegin;
5096   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5097   if (size == 1) {
5098     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5099     if (scall == MAT_INITIAL_MATRIX) {
5100       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5101     } else {
5102       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5103     }
5104     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5105     PetscFunctionReturn(0);
5106   }
5107   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5108   if (scall == MAT_INITIAL_MATRIX) {
5109     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5110   }
5111   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5112   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5113   PetscFunctionReturn(0);
5114 }
5115 
5116 #undef __FUNCT__
5117 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5118 /*@
5119      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5120           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5121           with MatGetSize()
5122 
5123     Not Collective
5124 
5125    Input Parameters:
5126 +    A - the matrix
5127 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5128 
5129    Output Parameter:
5130 .    A_loc - the local sequential matrix generated
5131 
5132     Level: developer
5133 
5134 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5135 
5136 @*/
5137 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5138 {
5139   PetscErrorCode ierr;
5140   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5141   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5142   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5143   MatScalar      *aa=a->a,*ba=b->a,*cam;
5144   PetscScalar    *ca;
5145   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5146   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5147   PetscBool      match;
5148 
5149   PetscFunctionBegin;
5150   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5151   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5152   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5153   if (scall == MAT_INITIAL_MATRIX) {
5154     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5155     ci[0] = 0;
5156     for (i=0; i<am; i++) {
5157       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5158     }
5159     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5160     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5161     k    = 0;
5162     for (i=0; i<am; i++) {
5163       ncols_o = bi[i+1] - bi[i];
5164       ncols_d = ai[i+1] - ai[i];
5165       /* off-diagonal portion of A */
5166       for (jo=0; jo<ncols_o; jo++) {
5167         col = cmap[*bj];
5168         if (col >= cstart) break;
5169         cj[k]   = col; bj++;
5170         ca[k++] = *ba++;
5171       }
5172       /* diagonal portion of A */
5173       for (j=0; j<ncols_d; j++) {
5174         cj[k]   = cstart + *aj++;
5175         ca[k++] = *aa++;
5176       }
5177       /* off-diagonal portion of A */
5178       for (j=jo; j<ncols_o; j++) {
5179         cj[k]   = cmap[*bj++];
5180         ca[k++] = *ba++;
5181       }
5182     }
5183     /* put together the new matrix */
5184     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5185     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5186     /* Since these are PETSc arrays, change flags to free them as necessary. */
5187     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5188     mat->free_a  = PETSC_TRUE;
5189     mat->free_ij = PETSC_TRUE;
5190     mat->nonew   = 0;
5191   } else if (scall == MAT_REUSE_MATRIX) {
5192     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5193     ci = mat->i; cj = mat->j; cam = mat->a;
5194     for (i=0; i<am; i++) {
5195       /* off-diagonal portion of A */
5196       ncols_o = bi[i+1] - bi[i];
5197       for (jo=0; jo<ncols_o; jo++) {
5198         col = cmap[*bj];
5199         if (col >= cstart) break;
5200         *cam++ = *ba++; bj++;
5201       }
5202       /* diagonal portion of A */
5203       ncols_d = ai[i+1] - ai[i];
5204       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5205       /* off-diagonal portion of A */
5206       for (j=jo; j<ncols_o; j++) {
5207         *cam++ = *ba++; bj++;
5208       }
5209     }
5210   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5211   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5212   PetscFunctionReturn(0);
5213 }
5214 
5215 #undef __FUNCT__
5216 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5217 /*@C
5218      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5219 
5220     Not Collective
5221 
5222    Input Parameters:
5223 +    A - the matrix
5224 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5225 -    row, col - index sets of rows and columns to extract (or NULL)
5226 
5227    Output Parameter:
5228 .    A_loc - the local sequential matrix generated
5229 
5230     Level: developer
5231 
5232 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5233 
5234 @*/
5235 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5236 {
5237   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5238   PetscErrorCode ierr;
5239   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5240   IS             isrowa,iscola;
5241   Mat            *aloc;
5242   PetscBool      match;
5243 
5244   PetscFunctionBegin;
5245   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5246   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5247   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5248   if (!row) {
5249     start = A->rmap->rstart; end = A->rmap->rend;
5250     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5251   } else {
5252     isrowa = *row;
5253   }
5254   if (!col) {
5255     start = A->cmap->rstart;
5256     cmap  = a->garray;
5257     nzA   = a->A->cmap->n;
5258     nzB   = a->B->cmap->n;
5259     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5260     ncols = 0;
5261     for (i=0; i<nzB; i++) {
5262       if (cmap[i] < start) idx[ncols++] = cmap[i];
5263       else break;
5264     }
5265     imark = i;
5266     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5267     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5268     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5269   } else {
5270     iscola = *col;
5271   }
5272   if (scall != MAT_INITIAL_MATRIX) {
5273     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5274     aloc[0] = *A_loc;
5275   }
5276   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5277   *A_loc = aloc[0];
5278   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5279   if (!row) {
5280     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5281   }
5282   if (!col) {
5283     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5284   }
5285   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5286   PetscFunctionReturn(0);
5287 }
5288 
5289 #undef __FUNCT__
5290 #define __FUNCT__ "MatGetBrowsOfAcols"
5291 /*@C
5292     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5293 
5294     Collective on Mat
5295 
5296    Input Parameters:
5297 +    A,B - the matrices in mpiaij format
5298 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5299 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5300 
5301    Output Parameter:
5302 +    rowb, colb - index sets of rows and columns of B to extract
5303 -    B_seq - the sequential matrix generated
5304 
5305     Level: developer
5306 
5307 @*/
5308 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5309 {
5310   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5311   PetscErrorCode ierr;
5312   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5313   IS             isrowb,iscolb;
5314   Mat            *bseq=NULL;
5315 
5316   PetscFunctionBegin;
5317   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5318     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5319   }
5320   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5321 
5322   if (scall == MAT_INITIAL_MATRIX) {
5323     start = A->cmap->rstart;
5324     cmap  = a->garray;
5325     nzA   = a->A->cmap->n;
5326     nzB   = a->B->cmap->n;
5327     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5328     ncols = 0;
5329     for (i=0; i<nzB; i++) {  /* row < local row index */
5330       if (cmap[i] < start) idx[ncols++] = cmap[i];
5331       else break;
5332     }
5333     imark = i;
5334     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5335     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5336     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5337     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5338   } else {
5339     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5340     isrowb  = *rowb; iscolb = *colb;
5341     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5342     bseq[0] = *B_seq;
5343   }
5344   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5345   *B_seq = bseq[0];
5346   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5347   if (!rowb) {
5348     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5349   } else {
5350     *rowb = isrowb;
5351   }
5352   if (!colb) {
5353     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5354   } else {
5355     *colb = iscolb;
5356   }
5357   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5358   PetscFunctionReturn(0);
5359 }
5360 
5361 #undef __FUNCT__
5362 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5363 /*
5364     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5365     of the OFF-DIAGONAL portion of local A
5366 
5367     Collective on Mat
5368 
5369    Input Parameters:
5370 +    A,B - the matrices in mpiaij format
5371 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5372 
5373    Output Parameter:
5374 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5375 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5376 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5377 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5378 
5379     Level: developer
5380 
5381 */
5382 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5383 {
5384   VecScatter_MPI_General *gen_to,*gen_from;
5385   PetscErrorCode         ierr;
5386   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5387   Mat_SeqAIJ             *b_oth;
5388   VecScatter             ctx =a->Mvctx;
5389   MPI_Comm               comm;
5390   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5391   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5392   PetscScalar            *rvalues,*svalues;
5393   MatScalar              *b_otha,*bufa,*bufA;
5394   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5395   MPI_Request            *rwaits = NULL,*swaits = NULL;
5396   MPI_Status             *sstatus,rstatus;
5397   PetscMPIInt            jj;
5398   PetscInt               *cols,sbs,rbs;
5399   PetscScalar            *vals;
5400 
5401   PetscFunctionBegin;
5402   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5403   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5404     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5405   }
5406   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5407   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5408 
5409   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5410   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5411   rvalues  = gen_from->values; /* holds the length of receiving row */
5412   svalues  = gen_to->values;   /* holds the length of sending row */
5413   nrecvs   = gen_from->n;
5414   nsends   = gen_to->n;
5415 
5416   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5417   srow    = gen_to->indices;    /* local row index to be sent */
5418   sstarts = gen_to->starts;
5419   sprocs  = gen_to->procs;
5420   sstatus = gen_to->sstatus;
5421   sbs     = gen_to->bs;
5422   rstarts = gen_from->starts;
5423   rprocs  = gen_from->procs;
5424   rbs     = gen_from->bs;
5425 
5426   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5427   if (scall == MAT_INITIAL_MATRIX) {
5428     /* i-array */
5429     /*---------*/
5430     /*  post receives */
5431     for (i=0; i<nrecvs; i++) {
5432       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5433       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5434       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5435     }
5436 
5437     /* pack the outgoing message */
5438     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5439 
5440     sstartsj[0] = 0;
5441     rstartsj[0] = 0;
5442     len         = 0; /* total length of j or a array to be sent */
5443     k           = 0;
5444     for (i=0; i<nsends; i++) {
5445       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5446       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5447       for (j=0; j<nrows; j++) {
5448         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5449         for (l=0; l<sbs; l++) {
5450           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5451 
5452           rowlen[j*sbs+l] = ncols;
5453 
5454           len += ncols;
5455           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5456         }
5457         k++;
5458       }
5459       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5460 
5461       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5462     }
5463     /* recvs and sends of i-array are completed */
5464     i = nrecvs;
5465     while (i--) {
5466       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5467     }
5468     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5469 
5470     /* allocate buffers for sending j and a arrays */
5471     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5472     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5473 
5474     /* create i-array of B_oth */
5475     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5476 
5477     b_othi[0] = 0;
5478     len       = 0; /* total length of j or a array to be received */
5479     k         = 0;
5480     for (i=0; i<nrecvs; i++) {
5481       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5482       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5483       for (j=0; j<nrows; j++) {
5484         b_othi[k+1] = b_othi[k] + rowlen[j];
5485         len        += rowlen[j]; k++;
5486       }
5487       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5488     }
5489 
5490     /* allocate space for j and a arrrays of B_oth */
5491     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5492     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5493 
5494     /* j-array */
5495     /*---------*/
5496     /*  post receives of j-array */
5497     for (i=0; i<nrecvs; i++) {
5498       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5499       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5500     }
5501 
5502     /* pack the outgoing message j-array */
5503     k = 0;
5504     for (i=0; i<nsends; i++) {
5505       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5506       bufJ  = bufj+sstartsj[i];
5507       for (j=0; j<nrows; j++) {
5508         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5509         for (ll=0; ll<sbs; ll++) {
5510           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5511           for (l=0; l<ncols; l++) {
5512             *bufJ++ = cols[l];
5513           }
5514           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5515         }
5516       }
5517       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5518     }
5519 
5520     /* recvs and sends of j-array are completed */
5521     i = nrecvs;
5522     while (i--) {
5523       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5524     }
5525     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5526   } else if (scall == MAT_REUSE_MATRIX) {
5527     sstartsj = *startsj_s;
5528     rstartsj = *startsj_r;
5529     bufa     = *bufa_ptr;
5530     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5531     b_otha   = b_oth->a;
5532   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5533 
5534   /* a-array */
5535   /*---------*/
5536   /*  post receives of a-array */
5537   for (i=0; i<nrecvs; i++) {
5538     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5539     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5540   }
5541 
5542   /* pack the outgoing message a-array */
5543   k = 0;
5544   for (i=0; i<nsends; i++) {
5545     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5546     bufA  = bufa+sstartsj[i];
5547     for (j=0; j<nrows; j++) {
5548       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5549       for (ll=0; ll<sbs; ll++) {
5550         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5551         for (l=0; l<ncols; l++) {
5552           *bufA++ = vals[l];
5553         }
5554         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5555       }
5556     }
5557     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5558   }
5559   /* recvs and sends of a-array are completed */
5560   i = nrecvs;
5561   while (i--) {
5562     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5563   }
5564   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5565   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5566 
5567   if (scall == MAT_INITIAL_MATRIX) {
5568     /* put together the new matrix */
5569     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5570 
5571     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5572     /* Since these are PETSc arrays, change flags to free them as necessary. */
5573     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5574     b_oth->free_a  = PETSC_TRUE;
5575     b_oth->free_ij = PETSC_TRUE;
5576     b_oth->nonew   = 0;
5577 
5578     ierr = PetscFree(bufj);CHKERRQ(ierr);
5579     if (!startsj_s || !bufa_ptr) {
5580       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5581       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5582     } else {
5583       *startsj_s = sstartsj;
5584       *startsj_r = rstartsj;
5585       *bufa_ptr  = bufa;
5586     }
5587   }
5588   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5589   PetscFunctionReturn(0);
5590 }
5591 
5592 #undef __FUNCT__
5593 #define __FUNCT__ "MatGetCommunicationStructs"
5594 /*@C
5595   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5596 
5597   Not Collective
5598 
5599   Input Parameters:
5600 . A - The matrix in mpiaij format
5601 
5602   Output Parameter:
5603 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5604 . colmap - A map from global column index to local index into lvec
5605 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5606 
5607   Level: developer
5608 
5609 @*/
5610 #if defined(PETSC_USE_CTABLE)
5611 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5612 #else
5613 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5614 #endif
5615 {
5616   Mat_MPIAIJ *a;
5617 
5618   PetscFunctionBegin;
5619   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5620   PetscValidPointer(lvec, 2);
5621   PetscValidPointer(colmap, 3);
5622   PetscValidPointer(multScatter, 4);
5623   a = (Mat_MPIAIJ*) A->data;
5624   if (lvec) *lvec = a->lvec;
5625   if (colmap) *colmap = a->colmap;
5626   if (multScatter) *multScatter = a->Mvctx;
5627   PetscFunctionReturn(0);
5628 }
5629 
5630 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5631 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5632 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5633 
5634 #undef __FUNCT__
5635 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5636 /*
5637     Computes (B'*A')' since computing B*A directly is untenable
5638 
5639                n                       p                          p
5640         (              )       (              )         (                  )
5641       m (      A       )  *  n (       B      )   =   m (         C        )
5642         (              )       (              )         (                  )
5643 
5644 */
5645 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5646 {
5647   PetscErrorCode ierr;
5648   Mat            At,Bt,Ct;
5649 
5650   PetscFunctionBegin;
5651   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5652   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5653   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5654   ierr = MatDestroy(&At);CHKERRQ(ierr);
5655   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5656   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5657   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5658   PetscFunctionReturn(0);
5659 }
5660 
5661 #undef __FUNCT__
5662 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5663 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5664 {
5665   PetscErrorCode ierr;
5666   PetscInt       m=A->rmap->n,n=B->cmap->n;
5667   Mat            Cmat;
5668 
5669   PetscFunctionBegin;
5670   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5671   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5672   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5673   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5674   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5675   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5676   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5677   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5678 
5679   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5680 
5681   *C = Cmat;
5682   PetscFunctionReturn(0);
5683 }
5684 
5685 /* ----------------------------------------------------------------*/
5686 #undef __FUNCT__
5687 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5688 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5689 {
5690   PetscErrorCode ierr;
5691 
5692   PetscFunctionBegin;
5693   if (scall == MAT_INITIAL_MATRIX) {
5694     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5695     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5696     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5697   }
5698   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5699   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5700   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5701   PetscFunctionReturn(0);
5702 }
5703 
5704 #if defined(PETSC_HAVE_MUMPS)
5705 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5706 #endif
5707 #if defined(PETSC_HAVE_PASTIX)
5708 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5709 #endif
5710 #if defined(PETSC_HAVE_SUPERLU_DIST)
5711 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5712 #endif
5713 #if defined(PETSC_HAVE_CLIQUE)
5714 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5715 #endif
5716 
5717 /*MC
5718    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5719 
5720    Options Database Keys:
5721 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5722 
5723   Level: beginner
5724 
5725 .seealso: MatCreateAIJ()
5726 M*/
5727 
5728 #undef __FUNCT__
5729 #define __FUNCT__ "MatCreate_MPIAIJ"
5730 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5731 {
5732   Mat_MPIAIJ     *b;
5733   PetscErrorCode ierr;
5734   PetscMPIInt    size;
5735 
5736   PetscFunctionBegin;
5737   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5738 
5739   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5740   B->data       = (void*)b;
5741   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5742   B->assembled  = PETSC_FALSE;
5743   B->insertmode = NOT_SET_VALUES;
5744   b->size       = size;
5745 
5746   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5747 
5748   /* build cache for off array entries formed */
5749   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5750 
5751   b->donotstash  = PETSC_FALSE;
5752   b->colmap      = 0;
5753   b->garray      = 0;
5754   b->roworiented = PETSC_TRUE;
5755 
5756   /* stuff used for matrix vector multiply */
5757   b->lvec  = NULL;
5758   b->Mvctx = NULL;
5759 
5760   /* stuff for MatGetRow() */
5761   b->rowindices   = 0;
5762   b->rowvalues    = 0;
5763   b->getrowactive = PETSC_FALSE;
5764 
5765   /* flexible pointer used in CUSP/CUSPARSE classes */
5766   b->spptr = NULL;
5767 
5768 #if defined(PETSC_HAVE_MUMPS)
5769   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5770 #endif
5771 #if defined(PETSC_HAVE_PASTIX)
5772   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5773 #endif
5774 #if defined(PETSC_HAVE_SUPERLU_DIST)
5775   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5776 #endif
5777 #if defined(PETSC_HAVE_CLIQUE)
5778   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5779 #endif
5780   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5781   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5782   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5783   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5784   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5785   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5786   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5787   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5788   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5789   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5790   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5791   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5792   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5793   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5794   PetscFunctionReturn(0);
5795 }
5796 
5797 #undef __FUNCT__
5798 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5799 /*@
5800      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5801          and "off-diagonal" part of the matrix in CSR format.
5802 
5803    Collective on MPI_Comm
5804 
5805    Input Parameters:
5806 +  comm - MPI communicator
5807 .  m - number of local rows (Cannot be PETSC_DECIDE)
5808 .  n - This value should be the same as the local size used in creating the
5809        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5810        calculated if N is given) For square matrices n is almost always m.
5811 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5812 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5813 .   i - row indices for "diagonal" portion of matrix
5814 .   j - column indices
5815 .   a - matrix values
5816 .   oi - row indices for "off-diagonal" portion of matrix
5817 .   oj - column indices
5818 -   oa - matrix values
5819 
5820    Output Parameter:
5821 .   mat - the matrix
5822 
5823    Level: advanced
5824 
5825    Notes:
5826        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5827        must free the arrays once the matrix has been destroyed and not before.
5828 
5829        The i and j indices are 0 based
5830 
5831        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5832 
5833        This sets local rows and cannot be used to set off-processor values.
5834 
5835        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5836        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5837        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5838        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5839        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5840        communication if it is known that only local entries will be set.
5841 
5842 .keywords: matrix, aij, compressed row, sparse, parallel
5843 
5844 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5845           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5846 @*/
5847 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5848 {
5849   PetscErrorCode ierr;
5850   Mat_MPIAIJ     *maij;
5851 
5852   PetscFunctionBegin;
5853   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5854   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5855   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5856   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5857   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5858   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5859   maij = (Mat_MPIAIJ*) (*mat)->data;
5860 
5861   (*mat)->preallocated = PETSC_TRUE;
5862 
5863   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5864   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5865 
5866   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5867   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5868 
5869   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5870   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5871   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5872   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5873 
5874   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5875   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5876   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5877   PetscFunctionReturn(0);
5878 }
5879 
5880 /*
5881     Special version for direct calls from Fortran
5882 */
5883 #include <petsc-private/fortranimpl.h>
5884 
5885 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5886 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5887 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5888 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5889 #endif
5890 
5891 /* Change these macros so can be used in void function */
5892 #undef CHKERRQ
5893 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5894 #undef SETERRQ2
5895 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5896 #undef SETERRQ3
5897 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5898 #undef SETERRQ
5899 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5900 
5901 #undef __FUNCT__
5902 #define __FUNCT__ "matsetvaluesmpiaij_"
5903 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5904 {
5905   Mat            mat  = *mmat;
5906   PetscInt       m    = *mm, n = *mn;
5907   InsertMode     addv = *maddv;
5908   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5909   PetscScalar    value;
5910   PetscErrorCode ierr;
5911 
5912   MatCheckPreallocated(mat,1);
5913   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5914 
5915 #if defined(PETSC_USE_DEBUG)
5916   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5917 #endif
5918   {
5919     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5920     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5921     PetscBool roworiented = aij->roworiented;
5922 
5923     /* Some Variables required in the macro */
5924     Mat        A                 = aij->A;
5925     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5926     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5927     MatScalar  *aa               = a->a;
5928     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5929     Mat        B                 = aij->B;
5930     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5931     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5932     MatScalar  *ba               = b->a;
5933 
5934     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5935     PetscInt  nonew = a->nonew;
5936     MatScalar *ap1,*ap2;
5937 
5938     PetscFunctionBegin;
5939     for (i=0; i<m; i++) {
5940       if (im[i] < 0) continue;
5941 #if defined(PETSC_USE_DEBUG)
5942       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5943 #endif
5944       if (im[i] >= rstart && im[i] < rend) {
5945         row      = im[i] - rstart;
5946         lastcol1 = -1;
5947         rp1      = aj + ai[row];
5948         ap1      = aa + ai[row];
5949         rmax1    = aimax[row];
5950         nrow1    = ailen[row];
5951         low1     = 0;
5952         high1    = nrow1;
5953         lastcol2 = -1;
5954         rp2      = bj + bi[row];
5955         ap2      = ba + bi[row];
5956         rmax2    = bimax[row];
5957         nrow2    = bilen[row];
5958         low2     = 0;
5959         high2    = nrow2;
5960 
5961         for (j=0; j<n; j++) {
5962           if (roworiented) value = v[i*n+j];
5963           else value = v[i+j*m];
5964           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5965           if (in[j] >= cstart && in[j] < cend) {
5966             col = in[j] - cstart;
5967             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5968           } else if (in[j] < 0) continue;
5969 #if defined(PETSC_USE_DEBUG)
5970           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5971 #endif
5972           else {
5973             if (mat->was_assembled) {
5974               if (!aij->colmap) {
5975                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5976               }
5977 #if defined(PETSC_USE_CTABLE)
5978               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5979               col--;
5980 #else
5981               col = aij->colmap[in[j]] - 1;
5982 #endif
5983               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5984                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5985                 col  =  in[j];
5986                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5987                 B     = aij->B;
5988                 b     = (Mat_SeqAIJ*)B->data;
5989                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5990                 rp2   = bj + bi[row];
5991                 ap2   = ba + bi[row];
5992                 rmax2 = bimax[row];
5993                 nrow2 = bilen[row];
5994                 low2  = 0;
5995                 high2 = nrow2;
5996                 bm    = aij->B->rmap->n;
5997                 ba    = b->a;
5998               }
5999             } else col = in[j];
6000             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
6001           }
6002         }
6003       } else if (!aij->donotstash) {
6004         if (roworiented) {
6005           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6006         } else {
6007           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6008         }
6009       }
6010     }
6011   }
6012   PetscFunctionReturnVoid();
6013 }
6014 
6015