xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d3b23db5a7ee1f155504da2761e4a9fbf8c1ff2c)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2511   PetscInt    nzlocal,nsends,nrecvs;
2512   PetscMPIInt *send_rank,*recv_rank;
2513   PetscInt    *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2514   PetscScalar *sbuf_a,**rbuf_a;
2515   PetscSubcomm *psubcomm;
2516   PetscErrorCode (*Destroy)(Mat);
2517 } Mat_Redundant;
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2521 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2522 {
2523   PetscErrorCode ierr;
2524   Mat_Redundant  *redund=(Mat_Redundant*)ptr;
2525   PetscInt       i;
2526 
2527   PetscFunctionBegin;
2528   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2529   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2530   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2531   for (i=0; i<redund->nrecvs; i++) {
2532     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2533     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2534   }
2535   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2536   ierr = PetscFree(redund);CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatDestroy_MatRedundant"
2542 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2543 {
2544   PetscErrorCode ierr;
2545   PetscContainer container;
2546   Mat_Redundant  *redund=NULL;
2547 
2548   PetscFunctionBegin;
2549   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2550   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2551   ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2552 
2553   A->ops->destroy = redund->Destroy;
2554 
2555   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2556   if (A->ops->destroy) {
2557     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2558   }
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2564 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2565 {
2566   PetscMPIInt    rank,size;
2567   MPI_Comm       comm,subcomm=psubcomm->comm;
2568   PetscErrorCode ierr;
2569   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2570   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL;
2571   PetscInt       *rowrange = mat->rmap->range;
2572   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2573   Mat            A = aij->A,B=aij->B,C=*matredundant;
2574   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2575   PetscScalar    *sbuf_a;
2576   PetscInt       nzlocal=a->nz+b->nz;
2577   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2578   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2579   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2580   MatScalar      *aworkA,*aworkB;
2581   PetscScalar    *vals;
2582   PetscMPIInt    tag1,tag2,tag3,imdex;
2583   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2584   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2585   MPI_Status     recv_status,*send_status;
2586   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2587   PetscInt       **rbuf_j=NULL;
2588   PetscScalar    **rbuf_a=NULL;
2589   Mat_Redundant  *redund =NULL;
2590   PetscContainer container;
2591   PetscInt       rstart_sub,rend_sub,mloc_sub;
2592 
2593   PetscFunctionBegin;
2594   if (psubcomm->type != PETSC_SUBCOMM_INTERLACED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Input psubcomm must be PETSC_SUBCOMM_INTERLACED type");
2595   ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr);
2596   if (M != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-square matrix yet, rows %D columns %D",M,N);
2597 
2598   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2599   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2600   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2601 
2602   /* printf("[%d] MatGetRedundantMatrix_MPIAIJ_interlaced...\n",rank); */
2603 
2604   if (reuse == MAT_REUSE_MATRIX) {
2605     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2606     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2607     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2608     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2609     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2610 
2611     nsends    = redund->nsends;
2612     nrecvs    = redund->nrecvs;
2613     send_rank = redund->send_rank;
2614     recv_rank = redund->recv_rank;
2615     sbuf_nz   = redund->sbuf_nz;
2616     rbuf_nz   = redund->rbuf_nz;
2617     sbuf_j    = redund->sbuf_j;
2618     sbuf_a    = redund->sbuf_a;
2619     rbuf_j    = redund->rbuf_j;
2620     rbuf_a    = redund->rbuf_a;
2621   }
2622 
2623   if (reuse == MAT_INITIAL_MATRIX) {
2624     PetscMPIInt subrank,subsize;
2625     PetscInt    nleftover,np_subcomm;
2626 
2627     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2628     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2629 
2630     /* get local size of redundant matrix */
2631     const PetscInt *range;
2632     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2633     rstart_sub = range[nsubcomm*subrank];
2634     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2635       rend_sub = range[nsubcomm*(subrank+1)];
2636     } else {
2637       rend_sub = mat->rmap->N;
2638     }
2639 
2640     mloc_sub = rend_sub - rstart_sub;
2641     /* printf("[%d] rstart_sub %d %d, mloc_sub %d\n",rank,rstart_sub,rend_sub, mloc_sub); */
2642 
2643     /* get the destination processors' id send_rank, nsends and nrecvs */
2644     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2645 
2646     np_subcomm = size/nsubcomm;
2647     nleftover  = size - nsubcomm*np_subcomm;
2648 
2649     nsends = 0; nrecvs = 0;
2650     for (i=0; i<size; i++) {
2651       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2652         send_rank[nsends]   = i; nsends++;
2653         recv_rank[nrecvs++] = i;
2654       }
2655     }
2656     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2657       i = size-nleftover-1;
2658       j = 0;
2659       while (j < nsubcomm - nleftover) {
2660         send_rank[nsends++] = i;
2661         i--; j++;
2662       }
2663     }
2664 
2665     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2666       for (i=0; i<nleftover; i++) {
2667         recv_rank[nrecvs++] = size-nleftover+i;
2668       }
2669     }
2670 
2671     /* allocate sbuf_j, sbuf_a */
2672     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2673     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2674     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2675   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2676 
2677   /* copy mat's local entries into the buffers */
2678   if (reuse == MAT_INITIAL_MATRIX) {
2679     rownz_max = 0;
2680     rptr      = sbuf_j;
2681     cols      = sbuf_j + rend-rstart + 1;
2682     vals      = sbuf_a;
2683     rptr[0]   = 0;
2684     for (i=0; i<rend-rstart; i++) {
2685       row    = i + rstart;
2686       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2687       ncols  = nzA + nzB;
2688       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2689       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2690       /* load the column indices for this row into cols */
2691       lwrite = 0;
2692       for (l=0; l<nzB; l++) {
2693         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2694           vals[lwrite]   = aworkB[l];
2695           cols[lwrite++] = ctmp;
2696         }
2697       }
2698       for (l=0; l<nzA; l++) {
2699         vals[lwrite]   = aworkA[l];
2700         cols[lwrite++] = cstart + cworkA[l];
2701       }
2702       for (l=0; l<nzB; l++) {
2703         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2704           vals[lwrite]   = aworkB[l];
2705           cols[lwrite++] = ctmp;
2706         }
2707       }
2708       vals     += ncols;
2709       cols     += ncols;
2710       rptr[i+1] = rptr[i] + ncols;
2711       if (rownz_max < ncols) rownz_max = ncols;
2712     }
2713     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2714   } else { /* only copy matrix values into sbuf_a */
2715     rptr    = sbuf_j;
2716     vals    = sbuf_a;
2717     rptr[0] = 0;
2718     for (i=0; i<rend-rstart; i++) {
2719       row    = i + rstart;
2720       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2721       ncols  = nzA + nzB;
2722       cworkB = b->j + b->i[i];
2723       aworkA = a->a + a->i[i];
2724       aworkB = b->a + b->i[i];
2725       lwrite = 0;
2726       for (l=0; l<nzB; l++) {
2727         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2728       }
2729       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2730       for (l=0; l<nzB; l++) {
2731         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2732       }
2733       vals     += ncols;
2734       rptr[i+1] = rptr[i] + ncols;
2735     }
2736   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2737 
2738   /* send nzlocal to others, and recv other's nzlocal */
2739   /*--------------------------------------------------*/
2740   if (reuse == MAT_INITIAL_MATRIX) {
2741     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2742 
2743     s_waits2 = s_waits3 + nsends;
2744     s_waits1 = s_waits2 + nsends;
2745     r_waits1 = s_waits1 + nsends;
2746     r_waits2 = r_waits1 + nrecvs;
2747     r_waits3 = r_waits2 + nrecvs;
2748   } else {
2749     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2750 
2751     r_waits3 = s_waits3 + nsends;
2752   }
2753 
2754   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2755   if (reuse == MAT_INITIAL_MATRIX) {
2756     /* get new tags to keep the communication clean */
2757     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2758     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2759     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2760 
2761     /* post receives of other's nzlocal */
2762     for (i=0; i<nrecvs; i++) {
2763       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2764     }
2765     /* send nzlocal to others */
2766     for (i=0; i<nsends; i++) {
2767       sbuf_nz[i] = nzlocal;
2768       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2769     }
2770     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2771     count = nrecvs;
2772     while (count) {
2773       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2774 
2775       recv_rank[imdex] = recv_status.MPI_SOURCE;
2776       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2777       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2778 
2779       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2780 
2781       rbuf_nz[imdex] += i + 2;
2782 
2783       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2784       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2785       count--;
2786     }
2787     /* wait on sends of nzlocal */
2788     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2789     /* send mat->i,j to others, and recv from other's */
2790     /*------------------------------------------------*/
2791     for (i=0; i<nsends; i++) {
2792       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2793       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2794     }
2795     /* wait on receives of mat->i,j */
2796     /*------------------------------*/
2797     count = nrecvs;
2798     while (count) {
2799       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2800       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2801       count--;
2802     }
2803     /* wait on sends of mat->i,j */
2804     /*---------------------------*/
2805     if (nsends) {
2806       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2807     }
2808   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2809 
2810   /* post receives, send and receive mat->a */
2811   /*----------------------------------------*/
2812   for (imdex=0; imdex<nrecvs; imdex++) {
2813     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2814   }
2815   for (i=0; i<nsends; i++) {
2816     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2817   }
2818   count = nrecvs;
2819   while (count) {
2820     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2821     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2822     count--;
2823   }
2824   if (nsends) {
2825     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2826   }
2827 
2828   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2829 
2830   /* create redundant matrix */
2831   /*-------------------------*/
2832   if (reuse == MAT_INITIAL_MATRIX) {
2833     /* compute rownz_max for preallocation */
2834     for (imdex=0; imdex<nrecvs; imdex++) {
2835       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2836       rptr = rbuf_j[imdex];
2837       for (i=0; i<j; i++) {
2838         ncols = rptr[i+1] - rptr[i];
2839         if (rownz_max < ncols) rownz_max = ncols;
2840       }
2841     }
2842 
2843     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2844     ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2845     /* ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DETERMINE,mat->cmap->N);CHKERRQ(ierr); -- give incorrect local column! */
2846     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2847     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2848     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2849     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2850   } else {
2851     C = *matredundant;
2852   }
2853 
2854   /* insert local matrix entries */
2855   rptr = sbuf_j;
2856   cols = sbuf_j + rend-rstart + 1;
2857   vals = sbuf_a;
2858   for (i=0; i<rend-rstart; i++) {
2859     row   = i + rstart;
2860     ncols = rptr[i+1] - rptr[i];
2861     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2862     vals += ncols;
2863     cols += ncols;
2864   }
2865   /* insert received matrix entries */
2866   for (imdex=0; imdex<nrecvs; imdex++) {
2867     rstart = rowrange[recv_rank[imdex]];
2868     rend   = rowrange[recv_rank[imdex]+1];
2869     rptr   = rbuf_j[imdex];
2870     cols   = rbuf_j[imdex] + rend-rstart + 1;
2871     vals   = rbuf_a[imdex];
2872     for (i=0; i<rend-rstart; i++) {
2873       row   = i + rstart;
2874       ncols = rptr[i+1] - rptr[i];
2875       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2876       vals += ncols;
2877       cols += ncols;
2878     }
2879   }
2880   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2881   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2882 
2883   if (reuse == MAT_INITIAL_MATRIX) {
2884     PetscContainer container;
2885     *matredundant = C;
2886     /* create a supporting struct and attach it to C for reuse */
2887     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2888     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2889     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2890     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2891     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2892     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2893 
2894     redund->nzlocal   = nzlocal;
2895     redund->nsends    = nsends;
2896     redund->nrecvs    = nrecvs;
2897     redund->send_rank = send_rank;
2898     redund->recv_rank = recv_rank;
2899     redund->sbuf_nz   = sbuf_nz;
2900     redund->rbuf_nz   = rbuf_nz;
2901     redund->sbuf_j    = sbuf_j;
2902     redund->sbuf_a    = sbuf_a;
2903     redund->rbuf_j    = rbuf_j;
2904     redund->rbuf_a    = rbuf_a;
2905 
2906     redund->Destroy = C->ops->destroy;
2907     C->ops->destroy = MatDestroy_MatRedundant;
2908   }
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 #undef __FUNCT__
2913 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2914 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2915 {
2916   PetscErrorCode ierr;
2917   MPI_Comm       comm;
2918   PetscMPIInt    size;
2919 
2920   PetscFunctionBegin;
2921   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2922   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2923   /* Only MatGetRedundantMatrix_MPIAIJ_interlaced() is written now */
2924   if (!subcomm || nsubcomm == size) { /* create psubcomm */
2925     PetscSubcomm psubcomm;
2926     ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2927     ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2928     ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr);
2929     ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr);
2930     /* destroy psubcomm? */
2931   } else {
2932     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet");
2933   }
2934   PetscFunctionReturn(0);
2935 }
2936 
2937 #undef __FUNCT__
2938 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2939 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2940 {
2941   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2942   PetscErrorCode ierr;
2943   PetscInt       i,*idxb = 0;
2944   PetscScalar    *va,*vb;
2945   Vec            vtmp;
2946 
2947   PetscFunctionBegin;
2948   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2949   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2950   if (idx) {
2951     for (i=0; i<A->rmap->n; i++) {
2952       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2953     }
2954   }
2955 
2956   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2957   if (idx) {
2958     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2959   }
2960   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2961   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2962 
2963   for (i=0; i<A->rmap->n; i++) {
2964     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2965       va[i] = vb[i];
2966       if (idx) idx[i] = a->garray[idxb[i]];
2967     }
2968   }
2969 
2970   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2971   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2972   ierr = PetscFree(idxb);CHKERRQ(ierr);
2973   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2974   PetscFunctionReturn(0);
2975 }
2976 
2977 #undef __FUNCT__
2978 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2979 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2980 {
2981   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2982   PetscErrorCode ierr;
2983   PetscInt       i,*idxb = 0;
2984   PetscScalar    *va,*vb;
2985   Vec            vtmp;
2986 
2987   PetscFunctionBegin;
2988   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2989   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2990   if (idx) {
2991     for (i=0; i<A->cmap->n; i++) {
2992       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2993     }
2994   }
2995 
2996   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2997   if (idx) {
2998     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2999   }
3000   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3001   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3002 
3003   for (i=0; i<A->rmap->n; i++) {
3004     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3005       va[i] = vb[i];
3006       if (idx) idx[i] = a->garray[idxb[i]];
3007     }
3008   }
3009 
3010   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3011   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3012   ierr = PetscFree(idxb);CHKERRQ(ierr);
3013   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3014   PetscFunctionReturn(0);
3015 }
3016 
3017 #undef __FUNCT__
3018 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3019 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3020 {
3021   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3022   PetscInt       n      = A->rmap->n;
3023   PetscInt       cstart = A->cmap->rstart;
3024   PetscInt       *cmap  = mat->garray;
3025   PetscInt       *diagIdx, *offdiagIdx;
3026   Vec            diagV, offdiagV;
3027   PetscScalar    *a, *diagA, *offdiagA;
3028   PetscInt       r;
3029   PetscErrorCode ierr;
3030 
3031   PetscFunctionBegin;
3032   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3033   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3034   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3035   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3036   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3037   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3038   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3039   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3040   for (r = 0; r < n; ++r) {
3041     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3042       a[r]   = diagA[r];
3043       idx[r] = cstart + diagIdx[r];
3044     } else {
3045       a[r]   = offdiagA[r];
3046       idx[r] = cmap[offdiagIdx[r]];
3047     }
3048   }
3049   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3050   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3051   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3052   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3053   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3054   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 #undef __FUNCT__
3059 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3060 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3061 {
3062   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3063   PetscInt       n      = A->rmap->n;
3064   PetscInt       cstart = A->cmap->rstart;
3065   PetscInt       *cmap  = mat->garray;
3066   PetscInt       *diagIdx, *offdiagIdx;
3067   Vec            diagV, offdiagV;
3068   PetscScalar    *a, *diagA, *offdiagA;
3069   PetscInt       r;
3070   PetscErrorCode ierr;
3071 
3072   PetscFunctionBegin;
3073   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3074   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3075   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3076   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3077   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3078   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3079   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3080   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3081   for (r = 0; r < n; ++r) {
3082     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3083       a[r]   = diagA[r];
3084       idx[r] = cstart + diagIdx[r];
3085     } else {
3086       a[r]   = offdiagA[r];
3087       idx[r] = cmap[offdiagIdx[r]];
3088     }
3089   }
3090   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3091   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3092   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3093   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3094   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3095   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3096   PetscFunctionReturn(0);
3097 }
3098 
3099 #undef __FUNCT__
3100 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3101 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3102 {
3103   PetscErrorCode ierr;
3104   Mat            *dummy;
3105 
3106   PetscFunctionBegin;
3107   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3108   *newmat = *dummy;
3109   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3110   PetscFunctionReturn(0);
3111 }
3112 
3113 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3114 
3115 #undef __FUNCT__
3116 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3117 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3118 {
3119   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3120   PetscErrorCode ierr;
3121 
3122   PetscFunctionBegin;
3123   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3124   PetscFunctionReturn(0);
3125 }
3126 
3127 #undef __FUNCT__
3128 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3129 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3130 {
3131   PetscErrorCode ierr;
3132   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3133 
3134   PetscFunctionBegin;
3135   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3136   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3137   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3138   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3139   PetscFunctionReturn(0);
3140 }
3141 
3142 /* -------------------------------------------------------------------*/
3143 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3144                                        MatGetRow_MPIAIJ,
3145                                        MatRestoreRow_MPIAIJ,
3146                                        MatMult_MPIAIJ,
3147                                 /* 4*/ MatMultAdd_MPIAIJ,
3148                                        MatMultTranspose_MPIAIJ,
3149                                        MatMultTransposeAdd_MPIAIJ,
3150 #if defined(PETSC_HAVE_PBGL)
3151                                        MatSolve_MPIAIJ,
3152 #else
3153                                        0,
3154 #endif
3155                                        0,
3156                                        0,
3157                                 /*10*/ 0,
3158                                        0,
3159                                        0,
3160                                        MatSOR_MPIAIJ,
3161                                        MatTranspose_MPIAIJ,
3162                                 /*15*/ MatGetInfo_MPIAIJ,
3163                                        MatEqual_MPIAIJ,
3164                                        MatGetDiagonal_MPIAIJ,
3165                                        MatDiagonalScale_MPIAIJ,
3166                                        MatNorm_MPIAIJ,
3167                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3168                                        MatAssemblyEnd_MPIAIJ,
3169                                        MatSetOption_MPIAIJ,
3170                                        MatZeroEntries_MPIAIJ,
3171                                 /*24*/ MatZeroRows_MPIAIJ,
3172                                        0,
3173 #if defined(PETSC_HAVE_PBGL)
3174                                        0,
3175 #else
3176                                        0,
3177 #endif
3178                                        0,
3179                                        0,
3180                                 /*29*/ MatSetUp_MPIAIJ,
3181 #if defined(PETSC_HAVE_PBGL)
3182                                        0,
3183 #else
3184                                        0,
3185 #endif
3186                                        0,
3187                                        0,
3188                                        0,
3189                                 /*34*/ MatDuplicate_MPIAIJ,
3190                                        0,
3191                                        0,
3192                                        0,
3193                                        0,
3194                                 /*39*/ MatAXPY_MPIAIJ,
3195                                        MatGetSubMatrices_MPIAIJ,
3196                                        MatIncreaseOverlap_MPIAIJ,
3197                                        MatGetValues_MPIAIJ,
3198                                        MatCopy_MPIAIJ,
3199                                 /*44*/ MatGetRowMax_MPIAIJ,
3200                                        MatScale_MPIAIJ,
3201                                        0,
3202                                        0,
3203                                        MatZeroRowsColumns_MPIAIJ,
3204                                 /*49*/ MatSetRandom_MPIAIJ,
3205                                        0,
3206                                        0,
3207                                        0,
3208                                        0,
3209                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3210                                        0,
3211                                        MatSetUnfactored_MPIAIJ,
3212                                        MatPermute_MPIAIJ,
3213                                        0,
3214                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3215                                        MatDestroy_MPIAIJ,
3216                                        MatView_MPIAIJ,
3217                                        0,
3218                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3219                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3220                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3221                                        0,
3222                                        0,
3223                                        0,
3224                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3225                                        MatGetRowMinAbs_MPIAIJ,
3226                                        0,
3227                                        MatSetColoring_MPIAIJ,
3228                                        0,
3229                                        MatSetValuesAdifor_MPIAIJ,
3230                                 /*75*/ MatFDColoringApply_AIJ,
3231                                        0,
3232                                        0,
3233                                        0,
3234                                        MatFindZeroDiagonals_MPIAIJ,
3235                                 /*80*/ 0,
3236                                        0,
3237                                        0,
3238                                 /*83*/ MatLoad_MPIAIJ,
3239                                        0,
3240                                        0,
3241                                        0,
3242                                        0,
3243                                        0,
3244                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3245                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3246                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3247                                        MatPtAP_MPIAIJ_MPIAIJ,
3248                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3249                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3250                                        0,
3251                                        0,
3252                                        0,
3253                                        0,
3254                                 /*99*/ 0,
3255                                        0,
3256                                        0,
3257                                        MatConjugate_MPIAIJ,
3258                                        0,
3259                                 /*104*/MatSetValuesRow_MPIAIJ,
3260                                        MatRealPart_MPIAIJ,
3261                                        MatImaginaryPart_MPIAIJ,
3262                                        0,
3263                                        0,
3264                                 /*109*/0,
3265                                        MatGetRedundantMatrix_MPIAIJ,
3266                                        MatGetRowMin_MPIAIJ,
3267                                        0,
3268                                        0,
3269                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3270                                        0,
3271                                        0,
3272                                        0,
3273                                        0,
3274                                 /*119*/0,
3275                                        0,
3276                                        0,
3277                                        0,
3278                                        MatGetMultiProcBlock_MPIAIJ,
3279                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3280                                        MatGetColumnNorms_MPIAIJ,
3281                                        MatInvertBlockDiagonal_MPIAIJ,
3282                                        0,
3283                                        MatGetSubMatricesParallel_MPIAIJ,
3284                                 /*129*/0,
3285                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3286                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3287                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3288                                        0,
3289                                 /*134*/0,
3290                                        0,
3291                                        0,
3292                                        0,
3293                                        0,
3294                                 /*139*/0,
3295                                        0
3296 };
3297 
3298 /* ----------------------------------------------------------------------------------------*/
3299 
3300 #undef __FUNCT__
3301 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3302 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3303 {
3304   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3305   PetscErrorCode ierr;
3306 
3307   PetscFunctionBegin;
3308   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3309   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3310   PetscFunctionReturn(0);
3311 }
3312 
3313 #undef __FUNCT__
3314 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3315 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3316 {
3317   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3318   PetscErrorCode ierr;
3319 
3320   PetscFunctionBegin;
3321   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3322   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3323   PetscFunctionReturn(0);
3324 }
3325 
3326 #undef __FUNCT__
3327 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3328 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3329 {
3330   Mat_MPIAIJ     *b;
3331   PetscErrorCode ierr;
3332 
3333   PetscFunctionBegin;
3334   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3335   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3336   b = (Mat_MPIAIJ*)B->data;
3337 
3338   if (!B->preallocated) {
3339     /* Explicitly create 2 MATSEQAIJ matrices. */
3340     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3341     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3342     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3343     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3344     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3345     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3346     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3347     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3348     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3349     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3350   }
3351 
3352   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3353   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3354   B->preallocated = PETSC_TRUE;
3355   PetscFunctionReturn(0);
3356 }
3357 
3358 #undef __FUNCT__
3359 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3360 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3361 {
3362   Mat            mat;
3363   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3364   PetscErrorCode ierr;
3365 
3366   PetscFunctionBegin;
3367   *newmat = 0;
3368   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3369   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3370   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3371   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3372   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3373   a       = (Mat_MPIAIJ*)mat->data;
3374 
3375   mat->factortype   = matin->factortype;
3376   mat->rmap->bs     = matin->rmap->bs;
3377   mat->cmap->bs     = matin->cmap->bs;
3378   mat->assembled    = PETSC_TRUE;
3379   mat->insertmode   = NOT_SET_VALUES;
3380   mat->preallocated = PETSC_TRUE;
3381 
3382   a->size         = oldmat->size;
3383   a->rank         = oldmat->rank;
3384   a->donotstash   = oldmat->donotstash;
3385   a->roworiented  = oldmat->roworiented;
3386   a->rowindices   = 0;
3387   a->rowvalues    = 0;
3388   a->getrowactive = PETSC_FALSE;
3389 
3390   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3391   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3392 
3393   if (oldmat->colmap) {
3394 #if defined(PETSC_USE_CTABLE)
3395     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3396 #else
3397     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3398     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3399     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3400 #endif
3401   } else a->colmap = 0;
3402   if (oldmat->garray) {
3403     PetscInt len;
3404     len  = oldmat->B->cmap->n;
3405     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3406     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3407     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3408   } else a->garray = 0;
3409 
3410   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3411   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3412   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3413   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3414   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3415   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3416   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3417   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3418   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3419   *newmat = mat;
3420   PetscFunctionReturn(0);
3421 }
3422 
3423 
3424 
3425 #undef __FUNCT__
3426 #define __FUNCT__ "MatLoad_MPIAIJ"
3427 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3428 {
3429   PetscScalar    *vals,*svals;
3430   MPI_Comm       comm;
3431   PetscErrorCode ierr;
3432   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3433   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3434   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3435   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3436   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3437   int            fd;
3438   PetscInt       bs = 1;
3439 
3440   PetscFunctionBegin;
3441   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3442   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3443   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3444   if (!rank) {
3445     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3446     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3447     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3448   }
3449 
3450   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3451   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3452   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3453 
3454   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3455 
3456   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3457   M    = header[1]; N = header[2];
3458   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3459   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3460   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3461 
3462   /* If global sizes are set, check if they are consistent with that given in the file */
3463   if (sizesset) {
3464     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3465   }
3466   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3467   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3468 
3469   /* determine ownership of all (block) rows */
3470   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3471   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3472   else m = newMat->rmap->n; /* Set by user */
3473 
3474   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3475   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3476 
3477   /* First process needs enough room for process with most rows */
3478   if (!rank) {
3479     mmax = rowners[1];
3480     for (i=2; i<=size; i++) {
3481       mmax = PetscMax(mmax, rowners[i]);
3482     }
3483   } else mmax = -1;             /* unused, but compilers complain */
3484 
3485   rowners[0] = 0;
3486   for (i=2; i<=size; i++) {
3487     rowners[i] += rowners[i-1];
3488   }
3489   rstart = rowners[rank];
3490   rend   = rowners[rank+1];
3491 
3492   /* distribute row lengths to all processors */
3493   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3494   if (!rank) {
3495     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3496     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3497     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3498     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3499     for (j=0; j<m; j++) {
3500       procsnz[0] += ourlens[j];
3501     }
3502     for (i=1; i<size; i++) {
3503       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3504       /* calculate the number of nonzeros on each processor */
3505       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3506         procsnz[i] += rowlengths[j];
3507       }
3508       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3509     }
3510     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3511   } else {
3512     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3513   }
3514 
3515   if (!rank) {
3516     /* determine max buffer needed and allocate it */
3517     maxnz = 0;
3518     for (i=0; i<size; i++) {
3519       maxnz = PetscMax(maxnz,procsnz[i]);
3520     }
3521     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3522 
3523     /* read in my part of the matrix column indices  */
3524     nz   = procsnz[0];
3525     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3526     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3527 
3528     /* read in every one elses and ship off */
3529     for (i=1; i<size; i++) {
3530       nz   = procsnz[i];
3531       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3532       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3533     }
3534     ierr = PetscFree(cols);CHKERRQ(ierr);
3535   } else {
3536     /* determine buffer space needed for message */
3537     nz = 0;
3538     for (i=0; i<m; i++) {
3539       nz += ourlens[i];
3540     }
3541     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3542 
3543     /* receive message of column indices*/
3544     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3545   }
3546 
3547   /* determine column ownership if matrix is not square */
3548   if (N != M) {
3549     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3550     else n = newMat->cmap->n;
3551     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3552     cstart = cend - n;
3553   } else {
3554     cstart = rstart;
3555     cend   = rend;
3556     n      = cend - cstart;
3557   }
3558 
3559   /* loop over local rows, determining number of off diagonal entries */
3560   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3561   jj   = 0;
3562   for (i=0; i<m; i++) {
3563     for (j=0; j<ourlens[i]; j++) {
3564       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3565       jj++;
3566     }
3567   }
3568 
3569   for (i=0; i<m; i++) {
3570     ourlens[i] -= offlens[i];
3571   }
3572   if (!sizesset) {
3573     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3574   }
3575 
3576   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3577 
3578   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3579 
3580   for (i=0; i<m; i++) {
3581     ourlens[i] += offlens[i];
3582   }
3583 
3584   if (!rank) {
3585     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3586 
3587     /* read in my part of the matrix numerical values  */
3588     nz   = procsnz[0];
3589     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3590 
3591     /* insert into matrix */
3592     jj      = rstart;
3593     smycols = mycols;
3594     svals   = vals;
3595     for (i=0; i<m; i++) {
3596       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3597       smycols += ourlens[i];
3598       svals   += ourlens[i];
3599       jj++;
3600     }
3601 
3602     /* read in other processors and ship out */
3603     for (i=1; i<size; i++) {
3604       nz   = procsnz[i];
3605       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3606       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3607     }
3608     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3609   } else {
3610     /* receive numeric values */
3611     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3612 
3613     /* receive message of values*/
3614     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3615 
3616     /* insert into matrix */
3617     jj      = rstart;
3618     smycols = mycols;
3619     svals   = vals;
3620     for (i=0; i<m; i++) {
3621       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3622       smycols += ourlens[i];
3623       svals   += ourlens[i];
3624       jj++;
3625     }
3626   }
3627   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3628   ierr = PetscFree(vals);CHKERRQ(ierr);
3629   ierr = PetscFree(mycols);CHKERRQ(ierr);
3630   ierr = PetscFree(rowners);CHKERRQ(ierr);
3631   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3633   PetscFunctionReturn(0);
3634 }
3635 
3636 #undef __FUNCT__
3637 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3638 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3639 {
3640   PetscErrorCode ierr;
3641   IS             iscol_local;
3642   PetscInt       csize;
3643 
3644   PetscFunctionBegin;
3645   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3646   if (call == MAT_REUSE_MATRIX) {
3647     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3648     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3649   } else {
3650     PetscInt cbs;
3651     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3652     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3653     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3654   }
3655   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3656   if (call == MAT_INITIAL_MATRIX) {
3657     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3658     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3659   }
3660   PetscFunctionReturn(0);
3661 }
3662 
3663 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3664 #undef __FUNCT__
3665 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3666 /*
3667     Not great since it makes two copies of the submatrix, first an SeqAIJ
3668   in local and then by concatenating the local matrices the end result.
3669   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3670 
3671   Note: This requires a sequential iscol with all indices.
3672 */
3673 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3674 {
3675   PetscErrorCode ierr;
3676   PetscMPIInt    rank,size;
3677   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3678   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3679   PetscBool      allcolumns, colflag;
3680   Mat            M,Mreuse;
3681   MatScalar      *vwork,*aa;
3682   MPI_Comm       comm;
3683   Mat_SeqAIJ     *aij;
3684 
3685   PetscFunctionBegin;
3686   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3687   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3688   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3689 
3690   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3691   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3692   if (colflag && ncol == mat->cmap->N) {
3693     allcolumns = PETSC_TRUE;
3694   } else {
3695     allcolumns = PETSC_FALSE;
3696   }
3697   if (call ==  MAT_REUSE_MATRIX) {
3698     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3699     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3700     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3701   } else {
3702     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3703   }
3704 
3705   /*
3706       m - number of local rows
3707       n - number of columns (same on all processors)
3708       rstart - first row in new global matrix generated
3709   */
3710   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3711   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3712   if (call == MAT_INITIAL_MATRIX) {
3713     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3714     ii  = aij->i;
3715     jj  = aij->j;
3716 
3717     /*
3718         Determine the number of non-zeros in the diagonal and off-diagonal
3719         portions of the matrix in order to do correct preallocation
3720     */
3721 
3722     /* first get start and end of "diagonal" columns */
3723     if (csize == PETSC_DECIDE) {
3724       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3725       if (mglobal == n) { /* square matrix */
3726         nlocal = m;
3727       } else {
3728         nlocal = n/size + ((n % size) > rank);
3729       }
3730     } else {
3731       nlocal = csize;
3732     }
3733     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3734     rstart = rend - nlocal;
3735     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3736 
3737     /* next, compute all the lengths */
3738     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3739     olens = dlens + m;
3740     for (i=0; i<m; i++) {
3741       jend = ii[i+1] - ii[i];
3742       olen = 0;
3743       dlen = 0;
3744       for (j=0; j<jend; j++) {
3745         if (*jj < rstart || *jj >= rend) olen++;
3746         else dlen++;
3747         jj++;
3748       }
3749       olens[i] = olen;
3750       dlens[i] = dlen;
3751     }
3752     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3753     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3754     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3755     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3756     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3757     ierr = PetscFree(dlens);CHKERRQ(ierr);
3758   } else {
3759     PetscInt ml,nl;
3760 
3761     M    = *newmat;
3762     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3763     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3764     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3765     /*
3766          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3767        rather than the slower MatSetValues().
3768     */
3769     M->was_assembled = PETSC_TRUE;
3770     M->assembled     = PETSC_FALSE;
3771   }
3772   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3773   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3774   ii   = aij->i;
3775   jj   = aij->j;
3776   aa   = aij->a;
3777   for (i=0; i<m; i++) {
3778     row   = rstart + i;
3779     nz    = ii[i+1] - ii[i];
3780     cwork = jj;     jj += nz;
3781     vwork = aa;     aa += nz;
3782     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3783   }
3784 
3785   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3786   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3787   *newmat = M;
3788 
3789   /* save submatrix used in processor for next request */
3790   if (call ==  MAT_INITIAL_MATRIX) {
3791     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3792     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3793   }
3794   PetscFunctionReturn(0);
3795 }
3796 
3797 #undef __FUNCT__
3798 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3799 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3800 {
3801   PetscInt       m,cstart, cend,j,nnz,i,d;
3802   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3803   const PetscInt *JJ;
3804   PetscScalar    *values;
3805   PetscErrorCode ierr;
3806 
3807   PetscFunctionBegin;
3808   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3809 
3810   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3811   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3812   m      = B->rmap->n;
3813   cstart = B->cmap->rstart;
3814   cend   = B->cmap->rend;
3815   rstart = B->rmap->rstart;
3816 
3817   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3818 
3819 #if defined(PETSC_USE_DEBUGGING)
3820   for (i=0; i<m; i++) {
3821     nnz = Ii[i+1]- Ii[i];
3822     JJ  = J + Ii[i];
3823     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3824     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3825     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3826   }
3827 #endif
3828 
3829   for (i=0; i<m; i++) {
3830     nnz     = Ii[i+1]- Ii[i];
3831     JJ      = J + Ii[i];
3832     nnz_max = PetscMax(nnz_max,nnz);
3833     d       = 0;
3834     for (j=0; j<nnz; j++) {
3835       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3836     }
3837     d_nnz[i] = d;
3838     o_nnz[i] = nnz - d;
3839   }
3840   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3841   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3842 
3843   if (v) values = (PetscScalar*)v;
3844   else {
3845     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3846     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3847   }
3848 
3849   for (i=0; i<m; i++) {
3850     ii   = i + rstart;
3851     nnz  = Ii[i+1]- Ii[i];
3852     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3853   }
3854   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3855   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3856 
3857   if (!v) {
3858     ierr = PetscFree(values);CHKERRQ(ierr);
3859   }
3860   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3861   PetscFunctionReturn(0);
3862 }
3863 
3864 #undef __FUNCT__
3865 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3866 /*@
3867    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3868    (the default parallel PETSc format).
3869 
3870    Collective on MPI_Comm
3871 
3872    Input Parameters:
3873 +  B - the matrix
3874 .  i - the indices into j for the start of each local row (starts with zero)
3875 .  j - the column indices for each local row (starts with zero)
3876 -  v - optional values in the matrix
3877 
3878    Level: developer
3879 
3880    Notes:
3881        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3882      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3883      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3884 
3885        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3886 
3887        The format which is used for the sparse matrix input, is equivalent to a
3888     row-major ordering.. i.e for the following matrix, the input data expected is
3889     as shown:
3890 
3891         1 0 0
3892         2 0 3     P0
3893        -------
3894         4 5 6     P1
3895 
3896      Process0 [P0]: rows_owned=[0,1]
3897         i =  {0,1,3}  [size = nrow+1  = 2+1]
3898         j =  {0,0,2}  [size = nz = 6]
3899         v =  {1,2,3}  [size = nz = 6]
3900 
3901      Process1 [P1]: rows_owned=[2]
3902         i =  {0,3}    [size = nrow+1  = 1+1]
3903         j =  {0,1,2}  [size = nz = 6]
3904         v =  {4,5,6}  [size = nz = 6]
3905 
3906 .keywords: matrix, aij, compressed row, sparse, parallel
3907 
3908 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3909           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3910 @*/
3911 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3912 {
3913   PetscErrorCode ierr;
3914 
3915   PetscFunctionBegin;
3916   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3917   PetscFunctionReturn(0);
3918 }
3919 
3920 #undef __FUNCT__
3921 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3922 /*@C
3923    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3924    (the default parallel PETSc format).  For good matrix assembly performance
3925    the user should preallocate the matrix storage by setting the parameters
3926    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3927    performance can be increased by more than a factor of 50.
3928 
3929    Collective on MPI_Comm
3930 
3931    Input Parameters:
3932 +  A - the matrix
3933 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3934            (same value is used for all local rows)
3935 .  d_nnz - array containing the number of nonzeros in the various rows of the
3936            DIAGONAL portion of the local submatrix (possibly different for each row)
3937            or NULL, if d_nz is used to specify the nonzero structure.
3938            The size of this array is equal to the number of local rows, i.e 'm'.
3939            For matrices that will be factored, you must leave room for (and set)
3940            the diagonal entry even if it is zero.
3941 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3942            submatrix (same value is used for all local rows).
3943 -  o_nnz - array containing the number of nonzeros in the various rows of the
3944            OFF-DIAGONAL portion of the local submatrix (possibly different for
3945            each row) or NULL, if o_nz is used to specify the nonzero
3946            structure. The size of this array is equal to the number
3947            of local rows, i.e 'm'.
3948 
3949    If the *_nnz parameter is given then the *_nz parameter is ignored
3950 
3951    The AIJ format (also called the Yale sparse matrix format or
3952    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3953    storage.  The stored row and column indices begin with zero.
3954    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3955 
3956    The parallel matrix is partitioned such that the first m0 rows belong to
3957    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3958    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3959 
3960    The DIAGONAL portion of the local submatrix of a processor can be defined
3961    as the submatrix which is obtained by extraction the part corresponding to
3962    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3963    first row that belongs to the processor, r2 is the last row belonging to
3964    the this processor, and c1-c2 is range of indices of the local part of a
3965    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3966    common case of a square matrix, the row and column ranges are the same and
3967    the DIAGONAL part is also square. The remaining portion of the local
3968    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3969 
3970    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3971 
3972    You can call MatGetInfo() to get information on how effective the preallocation was;
3973    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3974    You can also run with the option -info and look for messages with the string
3975    malloc in them to see if additional memory allocation was needed.
3976 
3977    Example usage:
3978 
3979    Consider the following 8x8 matrix with 34 non-zero values, that is
3980    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3981    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3982    as follows:
3983 
3984 .vb
3985             1  2  0  |  0  3  0  |  0  4
3986     Proc0   0  5  6  |  7  0  0  |  8  0
3987             9  0 10  | 11  0  0  | 12  0
3988     -------------------------------------
3989            13  0 14  | 15 16 17  |  0  0
3990     Proc1   0 18  0  | 19 20 21  |  0  0
3991             0  0  0  | 22 23  0  | 24  0
3992     -------------------------------------
3993     Proc2  25 26 27  |  0  0 28  | 29  0
3994            30  0  0  | 31 32 33  |  0 34
3995 .ve
3996 
3997    This can be represented as a collection of submatrices as:
3998 
3999 .vb
4000       A B C
4001       D E F
4002       G H I
4003 .ve
4004 
4005    Where the submatrices A,B,C are owned by proc0, D,E,F are
4006    owned by proc1, G,H,I are owned by proc2.
4007 
4008    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4009    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4010    The 'M','N' parameters are 8,8, and have the same values on all procs.
4011 
4012    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4013    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4014    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4015    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4016    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4017    matrix, ans [DF] as another SeqAIJ matrix.
4018 
4019    When d_nz, o_nz parameters are specified, d_nz storage elements are
4020    allocated for every row of the local diagonal submatrix, and o_nz
4021    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4022    One way to choose d_nz and o_nz is to use the max nonzerors per local
4023    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4024    In this case, the values of d_nz,o_nz are:
4025 .vb
4026      proc0 : dnz = 2, o_nz = 2
4027      proc1 : dnz = 3, o_nz = 2
4028      proc2 : dnz = 1, o_nz = 4
4029 .ve
4030    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4031    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4032    for proc3. i.e we are using 12+15+10=37 storage locations to store
4033    34 values.
4034 
4035    When d_nnz, o_nnz parameters are specified, the storage is specified
4036    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4037    In the above case the values for d_nnz,o_nnz are:
4038 .vb
4039      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4040      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4041      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4042 .ve
4043    Here the space allocated is sum of all the above values i.e 34, and
4044    hence pre-allocation is perfect.
4045 
4046    Level: intermediate
4047 
4048 .keywords: matrix, aij, compressed row, sparse, parallel
4049 
4050 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4051           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4052 @*/
4053 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4054 {
4055   PetscErrorCode ierr;
4056 
4057   PetscFunctionBegin;
4058   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4059   PetscValidType(B,1);
4060   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4061   PetscFunctionReturn(0);
4062 }
4063 
4064 #undef __FUNCT__
4065 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4066 /*@
4067      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4068          CSR format the local rows.
4069 
4070    Collective on MPI_Comm
4071 
4072    Input Parameters:
4073 +  comm - MPI communicator
4074 .  m - number of local rows (Cannot be PETSC_DECIDE)
4075 .  n - This value should be the same as the local size used in creating the
4076        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4077        calculated if N is given) For square matrices n is almost always m.
4078 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4079 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4080 .   i - row indices
4081 .   j - column indices
4082 -   a - matrix values
4083 
4084    Output Parameter:
4085 .   mat - the matrix
4086 
4087    Level: intermediate
4088 
4089    Notes:
4090        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4091      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4092      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4093 
4094        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4095 
4096        The format which is used for the sparse matrix input, is equivalent to a
4097     row-major ordering.. i.e for the following matrix, the input data expected is
4098     as shown:
4099 
4100         1 0 0
4101         2 0 3     P0
4102        -------
4103         4 5 6     P1
4104 
4105      Process0 [P0]: rows_owned=[0,1]
4106         i =  {0,1,3}  [size = nrow+1  = 2+1]
4107         j =  {0,0,2}  [size = nz = 6]
4108         v =  {1,2,3}  [size = nz = 6]
4109 
4110      Process1 [P1]: rows_owned=[2]
4111         i =  {0,3}    [size = nrow+1  = 1+1]
4112         j =  {0,1,2}  [size = nz = 6]
4113         v =  {4,5,6}  [size = nz = 6]
4114 
4115 .keywords: matrix, aij, compressed row, sparse, parallel
4116 
4117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4118           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4119 @*/
4120 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4121 {
4122   PetscErrorCode ierr;
4123 
4124   PetscFunctionBegin;
4125   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4126   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4127   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4128   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4129   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4130   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4131   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4132   PetscFunctionReturn(0);
4133 }
4134 
4135 #undef __FUNCT__
4136 #define __FUNCT__ "MatCreateAIJ"
4137 /*@C
4138    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4139    (the default parallel PETSc format).  For good matrix assembly performance
4140    the user should preallocate the matrix storage by setting the parameters
4141    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4142    performance can be increased by more than a factor of 50.
4143 
4144    Collective on MPI_Comm
4145 
4146    Input Parameters:
4147 +  comm - MPI communicator
4148 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4149            This value should be the same as the local size used in creating the
4150            y vector for the matrix-vector product y = Ax.
4151 .  n - This value should be the same as the local size used in creating the
4152        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4153        calculated if N is given) For square matrices n is almost always m.
4154 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4155 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4156 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4157            (same value is used for all local rows)
4158 .  d_nnz - array containing the number of nonzeros in the various rows of the
4159            DIAGONAL portion of the local submatrix (possibly different for each row)
4160            or NULL, if d_nz is used to specify the nonzero structure.
4161            The size of this array is equal to the number of local rows, i.e 'm'.
4162 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4163            submatrix (same value is used for all local rows).
4164 -  o_nnz - array containing the number of nonzeros in the various rows of the
4165            OFF-DIAGONAL portion of the local submatrix (possibly different for
4166            each row) or NULL, if o_nz is used to specify the nonzero
4167            structure. The size of this array is equal to the number
4168            of local rows, i.e 'm'.
4169 
4170    Output Parameter:
4171 .  A - the matrix
4172 
4173    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4174    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4175    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4176 
4177    Notes:
4178    If the *_nnz parameter is given then the *_nz parameter is ignored
4179 
4180    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4181    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4182    storage requirements for this matrix.
4183 
4184    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4185    processor than it must be used on all processors that share the object for
4186    that argument.
4187 
4188    The user MUST specify either the local or global matrix dimensions
4189    (possibly both).
4190 
4191    The parallel matrix is partitioned across processors such that the
4192    first m0 rows belong to process 0, the next m1 rows belong to
4193    process 1, the next m2 rows belong to process 2 etc.. where
4194    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4195    values corresponding to [m x N] submatrix.
4196 
4197    The columns are logically partitioned with the n0 columns belonging
4198    to 0th partition, the next n1 columns belonging to the next
4199    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4200 
4201    The DIAGONAL portion of the local submatrix on any given processor
4202    is the submatrix corresponding to the rows and columns m,n
4203    corresponding to the given processor. i.e diagonal matrix on
4204    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4205    etc. The remaining portion of the local submatrix [m x (N-n)]
4206    constitute the OFF-DIAGONAL portion. The example below better
4207    illustrates this concept.
4208 
4209    For a square global matrix we define each processor's diagonal portion
4210    to be its local rows and the corresponding columns (a square submatrix);
4211    each processor's off-diagonal portion encompasses the remainder of the
4212    local matrix (a rectangular submatrix).
4213 
4214    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4215 
4216    When calling this routine with a single process communicator, a matrix of
4217    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4218    type of communicator, use the construction mechanism:
4219      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4220 
4221    By default, this format uses inodes (identical nodes) when possible.
4222    We search for consecutive rows with the same nonzero structure, thereby
4223    reusing matrix information to achieve increased efficiency.
4224 
4225    Options Database Keys:
4226 +  -mat_no_inode  - Do not use inodes
4227 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4228 -  -mat_aij_oneindex - Internally use indexing starting at 1
4229         rather than 0.  Note that when calling MatSetValues(),
4230         the user still MUST index entries starting at 0!
4231 
4232 
4233    Example usage:
4234 
4235    Consider the following 8x8 matrix with 34 non-zero values, that is
4236    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4237    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4238    as follows:
4239 
4240 .vb
4241             1  2  0  |  0  3  0  |  0  4
4242     Proc0   0  5  6  |  7  0  0  |  8  0
4243             9  0 10  | 11  0  0  | 12  0
4244     -------------------------------------
4245            13  0 14  | 15 16 17  |  0  0
4246     Proc1   0 18  0  | 19 20 21  |  0  0
4247             0  0  0  | 22 23  0  | 24  0
4248     -------------------------------------
4249     Proc2  25 26 27  |  0  0 28  | 29  0
4250            30  0  0  | 31 32 33  |  0 34
4251 .ve
4252 
4253    This can be represented as a collection of submatrices as:
4254 
4255 .vb
4256       A B C
4257       D E F
4258       G H I
4259 .ve
4260 
4261    Where the submatrices A,B,C are owned by proc0, D,E,F are
4262    owned by proc1, G,H,I are owned by proc2.
4263 
4264    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4265    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4266    The 'M','N' parameters are 8,8, and have the same values on all procs.
4267 
4268    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4269    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4270    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4271    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4272    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4273    matrix, ans [DF] as another SeqAIJ matrix.
4274 
4275    When d_nz, o_nz parameters are specified, d_nz storage elements are
4276    allocated for every row of the local diagonal submatrix, and o_nz
4277    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4278    One way to choose d_nz and o_nz is to use the max nonzerors per local
4279    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4280    In this case, the values of d_nz,o_nz are:
4281 .vb
4282      proc0 : dnz = 2, o_nz = 2
4283      proc1 : dnz = 3, o_nz = 2
4284      proc2 : dnz = 1, o_nz = 4
4285 .ve
4286    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4287    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4288    for proc3. i.e we are using 12+15+10=37 storage locations to store
4289    34 values.
4290 
4291    When d_nnz, o_nnz parameters are specified, the storage is specified
4292    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4293    In the above case the values for d_nnz,o_nnz are:
4294 .vb
4295      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4296      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4297      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4298 .ve
4299    Here the space allocated is sum of all the above values i.e 34, and
4300    hence pre-allocation is perfect.
4301 
4302    Level: intermediate
4303 
4304 .keywords: matrix, aij, compressed row, sparse, parallel
4305 
4306 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4307           MPIAIJ, MatCreateMPIAIJWithArrays()
4308 @*/
4309 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4310 {
4311   PetscErrorCode ierr;
4312   PetscMPIInt    size;
4313 
4314   PetscFunctionBegin;
4315   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4316   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4317   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4318   if (size > 1) {
4319     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4320     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4321   } else {
4322     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4323     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4324   }
4325   PetscFunctionReturn(0);
4326 }
4327 
4328 #undef __FUNCT__
4329 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4330 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4331 {
4332   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4333 
4334   PetscFunctionBegin;
4335   *Ad     = a->A;
4336   *Ao     = a->B;
4337   *colmap = a->garray;
4338   PetscFunctionReturn(0);
4339 }
4340 
4341 #undef __FUNCT__
4342 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4343 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4344 {
4345   PetscErrorCode ierr;
4346   PetscInt       i;
4347   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4348 
4349   PetscFunctionBegin;
4350   if (coloring->ctype == IS_COLORING_GLOBAL) {
4351     ISColoringValue *allcolors,*colors;
4352     ISColoring      ocoloring;
4353 
4354     /* set coloring for diagonal portion */
4355     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4356 
4357     /* set coloring for off-diagonal portion */
4358     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4359     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4360     for (i=0; i<a->B->cmap->n; i++) {
4361       colors[i] = allcolors[a->garray[i]];
4362     }
4363     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4364     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4365     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4366     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4367   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4368     ISColoringValue *colors;
4369     PetscInt        *larray;
4370     ISColoring      ocoloring;
4371 
4372     /* set coloring for diagonal portion */
4373     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4374     for (i=0; i<a->A->cmap->n; i++) {
4375       larray[i] = i + A->cmap->rstart;
4376     }
4377     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4378     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4379     for (i=0; i<a->A->cmap->n; i++) {
4380       colors[i] = coloring->colors[larray[i]];
4381     }
4382     ierr = PetscFree(larray);CHKERRQ(ierr);
4383     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4384     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4385     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4386 
4387     /* set coloring for off-diagonal portion */
4388     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4389     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4390     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4391     for (i=0; i<a->B->cmap->n; i++) {
4392       colors[i] = coloring->colors[larray[i]];
4393     }
4394     ierr = PetscFree(larray);CHKERRQ(ierr);
4395     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4396     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4397     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4398   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4399   PetscFunctionReturn(0);
4400 }
4401 
4402 #undef __FUNCT__
4403 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4404 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4405 {
4406   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4407   PetscErrorCode ierr;
4408 
4409   PetscFunctionBegin;
4410   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4411   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4412   PetscFunctionReturn(0);
4413 }
4414 
4415 #undef __FUNCT__
4416 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4417 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4418 {
4419   PetscErrorCode ierr;
4420   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4421   PetscInt       *indx;
4422 
4423   PetscFunctionBegin;
4424   /* This routine will ONLY return MPIAIJ type matrix */
4425   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4426   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4427   if (n == PETSC_DECIDE) {
4428     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4429   }
4430   /* Check sum(n) = N */
4431   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4432   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4433 
4434   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4435   rstart -= m;
4436 
4437   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4438   for (i=0; i<m; i++) {
4439     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4440     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4441     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4442   }
4443 
4444   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4445   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4446   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4447   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4448   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4449   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4450   PetscFunctionReturn(0);
4451 }
4452 
4453 #undef __FUNCT__
4454 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4455 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4456 {
4457   PetscErrorCode ierr;
4458   PetscInt       m,N,i,rstart,nnz,Ii;
4459   PetscInt       *indx;
4460   PetscScalar    *values;
4461 
4462   PetscFunctionBegin;
4463   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4464   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4465   for (i=0; i<m; i++) {
4466     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4467     Ii   = i + rstart;
4468     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4469     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4470   }
4471   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4472   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4473   PetscFunctionReturn(0);
4474 }
4475 
4476 #undef __FUNCT__
4477 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4478 /*@
4479       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4480                  matrices from each processor
4481 
4482     Collective on MPI_Comm
4483 
4484    Input Parameters:
4485 +    comm - the communicators the parallel matrix will live on
4486 .    inmat - the input sequential matrices
4487 .    n - number of local columns (or PETSC_DECIDE)
4488 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4489 
4490    Output Parameter:
4491 .    outmat - the parallel matrix generated
4492 
4493     Level: advanced
4494 
4495    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4496 
4497 @*/
4498 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4499 {
4500   PetscErrorCode ierr;
4501 
4502   PetscFunctionBegin;
4503   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4504   if (scall == MAT_INITIAL_MATRIX) {
4505     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4506   }
4507   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4508   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 #undef __FUNCT__
4513 #define __FUNCT__ "MatFileSplit"
4514 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4515 {
4516   PetscErrorCode    ierr;
4517   PetscMPIInt       rank;
4518   PetscInt          m,N,i,rstart,nnz;
4519   size_t            len;
4520   const PetscInt    *indx;
4521   PetscViewer       out;
4522   char              *name;
4523   Mat               B;
4524   const PetscScalar *values;
4525 
4526   PetscFunctionBegin;
4527   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4528   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4529   /* Should this be the type of the diagonal block of A? */
4530   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4531   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4532   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4533   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4534   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4535   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4536   for (i=0; i<m; i++) {
4537     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4538     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4539     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4540   }
4541   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4542   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4543 
4544   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4545   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4546   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4547   sprintf(name,"%s.%d",outfile,rank);
4548   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4549   ierr = PetscFree(name);CHKERRQ(ierr);
4550   ierr = MatView(B,out);CHKERRQ(ierr);
4551   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4552   ierr = MatDestroy(&B);CHKERRQ(ierr);
4553   PetscFunctionReturn(0);
4554 }
4555 
4556 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4557 #undef __FUNCT__
4558 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4559 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4560 {
4561   PetscErrorCode      ierr;
4562   Mat_Merge_SeqsToMPI *merge;
4563   PetscContainer      container;
4564 
4565   PetscFunctionBegin;
4566   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4567   if (container) {
4568     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4569     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4570     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4571     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4572     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4573     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4574     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4575     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4576     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4577     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4578     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4579     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4580     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4581     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4582     ierr = PetscFree(merge);CHKERRQ(ierr);
4583     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4584   }
4585   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4586   PetscFunctionReturn(0);
4587 }
4588 
4589 #include <../src/mat/utils/freespace.h>
4590 #include <petscbt.h>
4591 
4592 #undef __FUNCT__
4593 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4594 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4595 {
4596   PetscErrorCode      ierr;
4597   MPI_Comm            comm;
4598   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4599   PetscMPIInt         size,rank,taga,*len_s;
4600   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4601   PetscInt            proc,m;
4602   PetscInt            **buf_ri,**buf_rj;
4603   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4604   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4605   MPI_Request         *s_waits,*r_waits;
4606   MPI_Status          *status;
4607   MatScalar           *aa=a->a;
4608   MatScalar           **abuf_r,*ba_i;
4609   Mat_Merge_SeqsToMPI *merge;
4610   PetscContainer      container;
4611 
4612   PetscFunctionBegin;
4613   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4614   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4615 
4616   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4617   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4618 
4619   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4620   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4621 
4622   bi     = merge->bi;
4623   bj     = merge->bj;
4624   buf_ri = merge->buf_ri;
4625   buf_rj = merge->buf_rj;
4626 
4627   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4628   owners = merge->rowmap->range;
4629   len_s  = merge->len_s;
4630 
4631   /* send and recv matrix values */
4632   /*-----------------------------*/
4633   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4634   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4635 
4636   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4637   for (proc=0,k=0; proc<size; proc++) {
4638     if (!len_s[proc]) continue;
4639     i    = owners[proc];
4640     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4641     k++;
4642   }
4643 
4644   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4645   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4646   ierr = PetscFree(status);CHKERRQ(ierr);
4647 
4648   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4649   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4650 
4651   /* insert mat values of mpimat */
4652   /*----------------------------*/
4653   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4654   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4655 
4656   for (k=0; k<merge->nrecv; k++) {
4657     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4658     nrows       = *(buf_ri_k[k]);
4659     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4660     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4661   }
4662 
4663   /* set values of ba */
4664   m = merge->rowmap->n;
4665   for (i=0; i<m; i++) {
4666     arow = owners[rank] + i;
4667     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4668     bnzi = bi[i+1] - bi[i];
4669     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4670 
4671     /* add local non-zero vals of this proc's seqmat into ba */
4672     anzi   = ai[arow+1] - ai[arow];
4673     aj     = a->j + ai[arow];
4674     aa     = a->a + ai[arow];
4675     nextaj = 0;
4676     for (j=0; nextaj<anzi; j++) {
4677       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4678         ba_i[j] += aa[nextaj++];
4679       }
4680     }
4681 
4682     /* add received vals into ba */
4683     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4684       /* i-th row */
4685       if (i == *nextrow[k]) {
4686         anzi   = *(nextai[k]+1) - *nextai[k];
4687         aj     = buf_rj[k] + *(nextai[k]);
4688         aa     = abuf_r[k] + *(nextai[k]);
4689         nextaj = 0;
4690         for (j=0; nextaj<anzi; j++) {
4691           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4692             ba_i[j] += aa[nextaj++];
4693           }
4694         }
4695         nextrow[k]++; nextai[k]++;
4696       }
4697     }
4698     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4699   }
4700   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4701   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4702 
4703   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4704   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4705   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4706   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4707   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4708   PetscFunctionReturn(0);
4709 }
4710 
4711 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4712 
4713 #undef __FUNCT__
4714 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4715 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4716 {
4717   PetscErrorCode      ierr;
4718   Mat                 B_mpi;
4719   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4720   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4721   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4722   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4723   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4724   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4725   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4726   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4727   MPI_Status          *status;
4728   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4729   PetscBT             lnkbt;
4730   Mat_Merge_SeqsToMPI *merge;
4731   PetscContainer      container;
4732 
4733   PetscFunctionBegin;
4734   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4735 
4736   /* make sure it is a PETSc comm */
4737   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4738   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4739   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4740 
4741   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4742   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4743 
4744   /* determine row ownership */
4745   /*---------------------------------------------------------*/
4746   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4747   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4748   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4749   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4750   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4751   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4752   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4753 
4754   m      = merge->rowmap->n;
4755   owners = merge->rowmap->range;
4756 
4757   /* determine the number of messages to send, their lengths */
4758   /*---------------------------------------------------------*/
4759   len_s = merge->len_s;
4760 
4761   len          = 0; /* length of buf_si[] */
4762   merge->nsend = 0;
4763   for (proc=0; proc<size; proc++) {
4764     len_si[proc] = 0;
4765     if (proc == rank) {
4766       len_s[proc] = 0;
4767     } else {
4768       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4769       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4770     }
4771     if (len_s[proc]) {
4772       merge->nsend++;
4773       nrows = 0;
4774       for (i=owners[proc]; i<owners[proc+1]; i++) {
4775         if (ai[i+1] > ai[i]) nrows++;
4776       }
4777       len_si[proc] = 2*(nrows+1);
4778       len         += len_si[proc];
4779     }
4780   }
4781 
4782   /* determine the number and length of messages to receive for ij-structure */
4783   /*-------------------------------------------------------------------------*/
4784   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4785   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4786 
4787   /* post the Irecv of j-structure */
4788   /*-------------------------------*/
4789   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4790   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4791 
4792   /* post the Isend of j-structure */
4793   /*--------------------------------*/
4794   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4795 
4796   for (proc=0, k=0; proc<size; proc++) {
4797     if (!len_s[proc]) continue;
4798     i    = owners[proc];
4799     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4800     k++;
4801   }
4802 
4803   /* receives and sends of j-structure are complete */
4804   /*------------------------------------------------*/
4805   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4806   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4807 
4808   /* send and recv i-structure */
4809   /*---------------------------*/
4810   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4811   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4812 
4813   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4814   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4815   for (proc=0,k=0; proc<size; proc++) {
4816     if (!len_s[proc]) continue;
4817     /* form outgoing message for i-structure:
4818          buf_si[0]:                 nrows to be sent
4819                [1:nrows]:           row index (global)
4820                [nrows+1:2*nrows+1]: i-structure index
4821     */
4822     /*-------------------------------------------*/
4823     nrows       = len_si[proc]/2 - 1;
4824     buf_si_i    = buf_si + nrows+1;
4825     buf_si[0]   = nrows;
4826     buf_si_i[0] = 0;
4827     nrows       = 0;
4828     for (i=owners[proc]; i<owners[proc+1]; i++) {
4829       anzi = ai[i+1] - ai[i];
4830       if (anzi) {
4831         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4832         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4833         nrows++;
4834       }
4835     }
4836     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4837     k++;
4838     buf_si += len_si[proc];
4839   }
4840 
4841   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4842   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4843 
4844   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4845   for (i=0; i<merge->nrecv; i++) {
4846     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4847   }
4848 
4849   ierr = PetscFree(len_si);CHKERRQ(ierr);
4850   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4851   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4852   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4853   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4854   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4855   ierr = PetscFree(status);CHKERRQ(ierr);
4856 
4857   /* compute a local seq matrix in each processor */
4858   /*----------------------------------------------*/
4859   /* allocate bi array and free space for accumulating nonzero column info */
4860   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4861   bi[0] = 0;
4862 
4863   /* create and initialize a linked list */
4864   nlnk = N+1;
4865   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4866 
4867   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4868   len  = ai[owners[rank+1]] - ai[owners[rank]];
4869   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4870 
4871   current_space = free_space;
4872 
4873   /* determine symbolic info for each local row */
4874   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4875 
4876   for (k=0; k<merge->nrecv; k++) {
4877     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4878     nrows       = *buf_ri_k[k];
4879     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4880     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4881   }
4882 
4883   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4884   len  = 0;
4885   for (i=0; i<m; i++) {
4886     bnzi = 0;
4887     /* add local non-zero cols of this proc's seqmat into lnk */
4888     arow  = owners[rank] + i;
4889     anzi  = ai[arow+1] - ai[arow];
4890     aj    = a->j + ai[arow];
4891     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4892     bnzi += nlnk;
4893     /* add received col data into lnk */
4894     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4895       if (i == *nextrow[k]) { /* i-th row */
4896         anzi  = *(nextai[k]+1) - *nextai[k];
4897         aj    = buf_rj[k] + *nextai[k];
4898         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4899         bnzi += nlnk;
4900         nextrow[k]++; nextai[k]++;
4901       }
4902     }
4903     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4904 
4905     /* if free space is not available, make more free space */
4906     if (current_space->local_remaining<bnzi) {
4907       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4908       nspacedouble++;
4909     }
4910     /* copy data into free space, then initialize lnk */
4911     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4912     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4913 
4914     current_space->array           += bnzi;
4915     current_space->local_used      += bnzi;
4916     current_space->local_remaining -= bnzi;
4917 
4918     bi[i+1] = bi[i] + bnzi;
4919   }
4920 
4921   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4922 
4923   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4924   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4925   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4926 
4927   /* create symbolic parallel matrix B_mpi */
4928   /*---------------------------------------*/
4929   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4930   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4931   if (n==PETSC_DECIDE) {
4932     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4933   } else {
4934     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4935   }
4936   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4937   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4938   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4939   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4940   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4941 
4942   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4943   B_mpi->assembled    = PETSC_FALSE;
4944   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4945   merge->bi           = bi;
4946   merge->bj           = bj;
4947   merge->buf_ri       = buf_ri;
4948   merge->buf_rj       = buf_rj;
4949   merge->coi          = NULL;
4950   merge->coj          = NULL;
4951   merge->owners_co    = NULL;
4952 
4953   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4954 
4955   /* attach the supporting struct to B_mpi for reuse */
4956   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4957   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4958   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4959   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4960   *mpimat = B_mpi;
4961 
4962   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4963   PetscFunctionReturn(0);
4964 }
4965 
4966 #undef __FUNCT__
4967 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4968 /*@C
4969       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4970                  matrices from each processor
4971 
4972     Collective on MPI_Comm
4973 
4974    Input Parameters:
4975 +    comm - the communicators the parallel matrix will live on
4976 .    seqmat - the input sequential matrices
4977 .    m - number of local rows (or PETSC_DECIDE)
4978 .    n - number of local columns (or PETSC_DECIDE)
4979 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4980 
4981    Output Parameter:
4982 .    mpimat - the parallel matrix generated
4983 
4984     Level: advanced
4985 
4986    Notes:
4987      The dimensions of the sequential matrix in each processor MUST be the same.
4988      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4989      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4990 @*/
4991 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4992 {
4993   PetscErrorCode ierr;
4994   PetscMPIInt    size;
4995 
4996   PetscFunctionBegin;
4997   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4998   if (size == 1) {
4999     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5000     if (scall == MAT_INITIAL_MATRIX) {
5001       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5002     } else {
5003       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5004     }
5005     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5006     PetscFunctionReturn(0);
5007   }
5008   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5009   if (scall == MAT_INITIAL_MATRIX) {
5010     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5011   }
5012   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5013   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5014   PetscFunctionReturn(0);
5015 }
5016 
5017 #undef __FUNCT__
5018 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5019 /*@
5020      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5021           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5022           with MatGetSize()
5023 
5024     Not Collective
5025 
5026    Input Parameters:
5027 +    A - the matrix
5028 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5029 
5030    Output Parameter:
5031 .    A_loc - the local sequential matrix generated
5032 
5033     Level: developer
5034 
5035 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5036 
5037 @*/
5038 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5039 {
5040   PetscErrorCode ierr;
5041   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5042   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5043   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5044   MatScalar      *aa=a->a,*ba=b->a,*cam;
5045   PetscScalar    *ca;
5046   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5047   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5048   PetscBool      match;
5049 
5050   PetscFunctionBegin;
5051   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5052   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5053   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5054   if (scall == MAT_INITIAL_MATRIX) {
5055     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5056     ci[0] = 0;
5057     for (i=0; i<am; i++) {
5058       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5059     }
5060     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5061     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5062     k    = 0;
5063     for (i=0; i<am; i++) {
5064       ncols_o = bi[i+1] - bi[i];
5065       ncols_d = ai[i+1] - ai[i];
5066       /* off-diagonal portion of A */
5067       for (jo=0; jo<ncols_o; jo++) {
5068         col = cmap[*bj];
5069         if (col >= cstart) break;
5070         cj[k]   = col; bj++;
5071         ca[k++] = *ba++;
5072       }
5073       /* diagonal portion of A */
5074       for (j=0; j<ncols_d; j++) {
5075         cj[k]   = cstart + *aj++;
5076         ca[k++] = *aa++;
5077       }
5078       /* off-diagonal portion of A */
5079       for (j=jo; j<ncols_o; j++) {
5080         cj[k]   = cmap[*bj++];
5081         ca[k++] = *ba++;
5082       }
5083     }
5084     /* put together the new matrix */
5085     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5086     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5087     /* Since these are PETSc arrays, change flags to free them as necessary. */
5088     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5089     mat->free_a  = PETSC_TRUE;
5090     mat->free_ij = PETSC_TRUE;
5091     mat->nonew   = 0;
5092   } else if (scall == MAT_REUSE_MATRIX) {
5093     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5094     ci = mat->i; cj = mat->j; cam = mat->a;
5095     for (i=0; i<am; i++) {
5096       /* off-diagonal portion of A */
5097       ncols_o = bi[i+1] - bi[i];
5098       for (jo=0; jo<ncols_o; jo++) {
5099         col = cmap[*bj];
5100         if (col >= cstart) break;
5101         *cam++ = *ba++; bj++;
5102       }
5103       /* diagonal portion of A */
5104       ncols_d = ai[i+1] - ai[i];
5105       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5106       /* off-diagonal portion of A */
5107       for (j=jo; j<ncols_o; j++) {
5108         *cam++ = *ba++; bj++;
5109       }
5110     }
5111   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5112   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5113   PetscFunctionReturn(0);
5114 }
5115 
5116 #undef __FUNCT__
5117 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5118 /*@C
5119      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5120 
5121     Not Collective
5122 
5123    Input Parameters:
5124 +    A - the matrix
5125 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5126 -    row, col - index sets of rows and columns to extract (or NULL)
5127 
5128    Output Parameter:
5129 .    A_loc - the local sequential matrix generated
5130 
5131     Level: developer
5132 
5133 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5134 
5135 @*/
5136 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5137 {
5138   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5139   PetscErrorCode ierr;
5140   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5141   IS             isrowa,iscola;
5142   Mat            *aloc;
5143   PetscBool      match;
5144 
5145   PetscFunctionBegin;
5146   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5147   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5148   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5149   if (!row) {
5150     start = A->rmap->rstart; end = A->rmap->rend;
5151     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5152   } else {
5153     isrowa = *row;
5154   }
5155   if (!col) {
5156     start = A->cmap->rstart;
5157     cmap  = a->garray;
5158     nzA   = a->A->cmap->n;
5159     nzB   = a->B->cmap->n;
5160     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5161     ncols = 0;
5162     for (i=0; i<nzB; i++) {
5163       if (cmap[i] < start) idx[ncols++] = cmap[i];
5164       else break;
5165     }
5166     imark = i;
5167     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5168     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5169     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5170   } else {
5171     iscola = *col;
5172   }
5173   if (scall != MAT_INITIAL_MATRIX) {
5174     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5175     aloc[0] = *A_loc;
5176   }
5177   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5178   *A_loc = aloc[0];
5179   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5180   if (!row) {
5181     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5182   }
5183   if (!col) {
5184     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5185   }
5186   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5187   PetscFunctionReturn(0);
5188 }
5189 
5190 #undef __FUNCT__
5191 #define __FUNCT__ "MatGetBrowsOfAcols"
5192 /*@C
5193     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5194 
5195     Collective on Mat
5196 
5197    Input Parameters:
5198 +    A,B - the matrices in mpiaij format
5199 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5200 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5201 
5202    Output Parameter:
5203 +    rowb, colb - index sets of rows and columns of B to extract
5204 -    B_seq - the sequential matrix generated
5205 
5206     Level: developer
5207 
5208 @*/
5209 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5210 {
5211   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5212   PetscErrorCode ierr;
5213   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5214   IS             isrowb,iscolb;
5215   Mat            *bseq=NULL;
5216 
5217   PetscFunctionBegin;
5218   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5219     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5220   }
5221   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5222 
5223   if (scall == MAT_INITIAL_MATRIX) {
5224     start = A->cmap->rstart;
5225     cmap  = a->garray;
5226     nzA   = a->A->cmap->n;
5227     nzB   = a->B->cmap->n;
5228     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5229     ncols = 0;
5230     for (i=0; i<nzB; i++) {  /* row < local row index */
5231       if (cmap[i] < start) idx[ncols++] = cmap[i];
5232       else break;
5233     }
5234     imark = i;
5235     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5236     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5237     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5238     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5239   } else {
5240     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5241     isrowb  = *rowb; iscolb = *colb;
5242     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5243     bseq[0] = *B_seq;
5244   }
5245   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5246   *B_seq = bseq[0];
5247   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5248   if (!rowb) {
5249     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5250   } else {
5251     *rowb = isrowb;
5252   }
5253   if (!colb) {
5254     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5255   } else {
5256     *colb = iscolb;
5257   }
5258   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5259   PetscFunctionReturn(0);
5260 }
5261 
5262 #undef __FUNCT__
5263 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5264 /*
5265     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5266     of the OFF-DIAGONAL portion of local A
5267 
5268     Collective on Mat
5269 
5270    Input Parameters:
5271 +    A,B - the matrices in mpiaij format
5272 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5273 
5274    Output Parameter:
5275 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5276 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5277 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5278 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5279 
5280     Level: developer
5281 
5282 */
5283 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5284 {
5285   VecScatter_MPI_General *gen_to,*gen_from;
5286   PetscErrorCode         ierr;
5287   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5288   Mat_SeqAIJ             *b_oth;
5289   VecScatter             ctx =a->Mvctx;
5290   MPI_Comm               comm;
5291   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5292   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5293   PetscScalar            *rvalues,*svalues;
5294   MatScalar              *b_otha,*bufa,*bufA;
5295   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5296   MPI_Request            *rwaits = NULL,*swaits = NULL;
5297   MPI_Status             *sstatus,rstatus;
5298   PetscMPIInt            jj;
5299   PetscInt               *cols,sbs,rbs;
5300   PetscScalar            *vals;
5301 
5302   PetscFunctionBegin;
5303   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5304   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5305     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5306   }
5307   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5308   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5309 
5310   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5311   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5312   rvalues  = gen_from->values; /* holds the length of receiving row */
5313   svalues  = gen_to->values;   /* holds the length of sending row */
5314   nrecvs   = gen_from->n;
5315   nsends   = gen_to->n;
5316 
5317   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5318   srow    = gen_to->indices;    /* local row index to be sent */
5319   sstarts = gen_to->starts;
5320   sprocs  = gen_to->procs;
5321   sstatus = gen_to->sstatus;
5322   sbs     = gen_to->bs;
5323   rstarts = gen_from->starts;
5324   rprocs  = gen_from->procs;
5325   rbs     = gen_from->bs;
5326 
5327   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5328   if (scall == MAT_INITIAL_MATRIX) {
5329     /* i-array */
5330     /*---------*/
5331     /*  post receives */
5332     for (i=0; i<nrecvs; i++) {
5333       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5334       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5335       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5336     }
5337 
5338     /* pack the outgoing message */
5339     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5340 
5341     sstartsj[0] = 0;
5342     rstartsj[0] = 0;
5343     len         = 0; /* total length of j or a array to be sent */
5344     k           = 0;
5345     for (i=0; i<nsends; i++) {
5346       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5347       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5348       for (j=0; j<nrows; j++) {
5349         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5350         for (l=0; l<sbs; l++) {
5351           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5352 
5353           rowlen[j*sbs+l] = ncols;
5354 
5355           len += ncols;
5356           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5357         }
5358         k++;
5359       }
5360       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5361 
5362       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5363     }
5364     /* recvs and sends of i-array are completed */
5365     i = nrecvs;
5366     while (i--) {
5367       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5368     }
5369     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5370 
5371     /* allocate buffers for sending j and a arrays */
5372     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5373     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5374 
5375     /* create i-array of B_oth */
5376     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5377 
5378     b_othi[0] = 0;
5379     len       = 0; /* total length of j or a array to be received */
5380     k         = 0;
5381     for (i=0; i<nrecvs; i++) {
5382       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5383       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5384       for (j=0; j<nrows; j++) {
5385         b_othi[k+1] = b_othi[k] + rowlen[j];
5386         len        += rowlen[j]; k++;
5387       }
5388       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5389     }
5390 
5391     /* allocate space for j and a arrrays of B_oth */
5392     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5393     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5394 
5395     /* j-array */
5396     /*---------*/
5397     /*  post receives of j-array */
5398     for (i=0; i<nrecvs; i++) {
5399       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5400       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5401     }
5402 
5403     /* pack the outgoing message j-array */
5404     k = 0;
5405     for (i=0; i<nsends; i++) {
5406       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5407       bufJ  = bufj+sstartsj[i];
5408       for (j=0; j<nrows; j++) {
5409         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5410         for (ll=0; ll<sbs; ll++) {
5411           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5412           for (l=0; l<ncols; l++) {
5413             *bufJ++ = cols[l];
5414           }
5415           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5416         }
5417       }
5418       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5419     }
5420 
5421     /* recvs and sends of j-array are completed */
5422     i = nrecvs;
5423     while (i--) {
5424       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5425     }
5426     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5427   } else if (scall == MAT_REUSE_MATRIX) {
5428     sstartsj = *startsj_s;
5429     rstartsj = *startsj_r;
5430     bufa     = *bufa_ptr;
5431     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5432     b_otha   = b_oth->a;
5433   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5434 
5435   /* a-array */
5436   /*---------*/
5437   /*  post receives of a-array */
5438   for (i=0; i<nrecvs; i++) {
5439     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5440     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5441   }
5442 
5443   /* pack the outgoing message a-array */
5444   k = 0;
5445   for (i=0; i<nsends; i++) {
5446     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5447     bufA  = bufa+sstartsj[i];
5448     for (j=0; j<nrows; j++) {
5449       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5450       for (ll=0; ll<sbs; ll++) {
5451         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5452         for (l=0; l<ncols; l++) {
5453           *bufA++ = vals[l];
5454         }
5455         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5456       }
5457     }
5458     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5459   }
5460   /* recvs and sends of a-array are completed */
5461   i = nrecvs;
5462   while (i--) {
5463     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5464   }
5465   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5466   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5467 
5468   if (scall == MAT_INITIAL_MATRIX) {
5469     /* put together the new matrix */
5470     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5471 
5472     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5473     /* Since these are PETSc arrays, change flags to free them as necessary. */
5474     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5475     b_oth->free_a  = PETSC_TRUE;
5476     b_oth->free_ij = PETSC_TRUE;
5477     b_oth->nonew   = 0;
5478 
5479     ierr = PetscFree(bufj);CHKERRQ(ierr);
5480     if (!startsj_s || !bufa_ptr) {
5481       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5482       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5483     } else {
5484       *startsj_s = sstartsj;
5485       *startsj_r = rstartsj;
5486       *bufa_ptr  = bufa;
5487     }
5488   }
5489   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5490   PetscFunctionReturn(0);
5491 }
5492 
5493 #undef __FUNCT__
5494 #define __FUNCT__ "MatGetCommunicationStructs"
5495 /*@C
5496   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5497 
5498   Not Collective
5499 
5500   Input Parameters:
5501 . A - The matrix in mpiaij format
5502 
5503   Output Parameter:
5504 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5505 . colmap - A map from global column index to local index into lvec
5506 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5507 
5508   Level: developer
5509 
5510 @*/
5511 #if defined(PETSC_USE_CTABLE)
5512 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5513 #else
5514 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5515 #endif
5516 {
5517   Mat_MPIAIJ *a;
5518 
5519   PetscFunctionBegin;
5520   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5521   PetscValidPointer(lvec, 2);
5522   PetscValidPointer(colmap, 3);
5523   PetscValidPointer(multScatter, 4);
5524   a = (Mat_MPIAIJ*) A->data;
5525   if (lvec) *lvec = a->lvec;
5526   if (colmap) *colmap = a->colmap;
5527   if (multScatter) *multScatter = a->Mvctx;
5528   PetscFunctionReturn(0);
5529 }
5530 
5531 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5532 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5533 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5534 
5535 #undef __FUNCT__
5536 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5537 /*
5538     Computes (B'*A')' since computing B*A directly is untenable
5539 
5540                n                       p                          p
5541         (              )       (              )         (                  )
5542       m (      A       )  *  n (       B      )   =   m (         C        )
5543         (              )       (              )         (                  )
5544 
5545 */
5546 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5547 {
5548   PetscErrorCode ierr;
5549   Mat            At,Bt,Ct;
5550 
5551   PetscFunctionBegin;
5552   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5553   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5554   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5555   ierr = MatDestroy(&At);CHKERRQ(ierr);
5556   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5557   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5558   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5559   PetscFunctionReturn(0);
5560 }
5561 
5562 #undef __FUNCT__
5563 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5564 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5565 {
5566   PetscErrorCode ierr;
5567   PetscInt       m=A->rmap->n,n=B->cmap->n;
5568   Mat            Cmat;
5569 
5570   PetscFunctionBegin;
5571   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5572   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5573   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5574   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5575   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5576   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5577   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5578   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5579 
5580   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5581 
5582   *C = Cmat;
5583   PetscFunctionReturn(0);
5584 }
5585 
5586 /* ----------------------------------------------------------------*/
5587 #undef __FUNCT__
5588 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5589 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5590 {
5591   PetscErrorCode ierr;
5592 
5593   PetscFunctionBegin;
5594   if (scall == MAT_INITIAL_MATRIX) {
5595     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5596     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5597     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5598   }
5599   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5600   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5601   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5602   PetscFunctionReturn(0);
5603 }
5604 
5605 #if defined(PETSC_HAVE_MUMPS)
5606 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5607 #endif
5608 #if defined(PETSC_HAVE_PASTIX)
5609 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5610 #endif
5611 #if defined(PETSC_HAVE_SUPERLU_DIST)
5612 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5613 #endif
5614 #if defined(PETSC_HAVE_CLIQUE)
5615 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5616 #endif
5617 
5618 /*MC
5619    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5620 
5621    Options Database Keys:
5622 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5623 
5624   Level: beginner
5625 
5626 .seealso: MatCreateAIJ()
5627 M*/
5628 
5629 #undef __FUNCT__
5630 #define __FUNCT__ "MatCreate_MPIAIJ"
5631 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5632 {
5633   Mat_MPIAIJ     *b;
5634   PetscErrorCode ierr;
5635   PetscMPIInt    size;
5636 
5637   PetscFunctionBegin;
5638   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5639 
5640   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5641   B->data       = (void*)b;
5642   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5643   B->assembled  = PETSC_FALSE;
5644   B->insertmode = NOT_SET_VALUES;
5645   b->size       = size;
5646 
5647   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5648 
5649   /* build cache for off array entries formed */
5650   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5651 
5652   b->donotstash  = PETSC_FALSE;
5653   b->colmap      = 0;
5654   b->garray      = 0;
5655   b->roworiented = PETSC_TRUE;
5656 
5657   /* stuff used for matrix vector multiply */
5658   b->lvec  = NULL;
5659   b->Mvctx = NULL;
5660 
5661   /* stuff for MatGetRow() */
5662   b->rowindices   = 0;
5663   b->rowvalues    = 0;
5664   b->getrowactive = PETSC_FALSE;
5665 
5666   /* flexible pointer used in CUSP/CUSPARSE classes */
5667   b->spptr = NULL;
5668 
5669 #if defined(PETSC_HAVE_MUMPS)
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5671 #endif
5672 #if defined(PETSC_HAVE_PASTIX)
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5674 #endif
5675 #if defined(PETSC_HAVE_SUPERLU_DIST)
5676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5677 #endif
5678 #if defined(PETSC_HAVE_CLIQUE)
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5680 #endif
5681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5694   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5695   PetscFunctionReturn(0);
5696 }
5697 
5698 #undef __FUNCT__
5699 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5700 /*@
5701      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5702          and "off-diagonal" part of the matrix in CSR format.
5703 
5704    Collective on MPI_Comm
5705 
5706    Input Parameters:
5707 +  comm - MPI communicator
5708 .  m - number of local rows (Cannot be PETSC_DECIDE)
5709 .  n - This value should be the same as the local size used in creating the
5710        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5711        calculated if N is given) For square matrices n is almost always m.
5712 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5713 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5714 .   i - row indices for "diagonal" portion of matrix
5715 .   j - column indices
5716 .   a - matrix values
5717 .   oi - row indices for "off-diagonal" portion of matrix
5718 .   oj - column indices
5719 -   oa - matrix values
5720 
5721    Output Parameter:
5722 .   mat - the matrix
5723 
5724    Level: advanced
5725 
5726    Notes:
5727        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5728        must free the arrays once the matrix has been destroyed and not before.
5729 
5730        The i and j indices are 0 based
5731 
5732        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5733 
5734        This sets local rows and cannot be used to set off-processor values.
5735 
5736        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5737        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5738        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5739        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5740        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5741        communication if it is known that only local entries will be set.
5742 
5743 .keywords: matrix, aij, compressed row, sparse, parallel
5744 
5745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5746           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5747 @*/
5748 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5749 {
5750   PetscErrorCode ierr;
5751   Mat_MPIAIJ     *maij;
5752 
5753   PetscFunctionBegin;
5754   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5755   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5756   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5757   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5758   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5759   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5760   maij = (Mat_MPIAIJ*) (*mat)->data;
5761 
5762   (*mat)->preallocated = PETSC_TRUE;
5763 
5764   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5765   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5766 
5767   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5768   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5769 
5770   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5771   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5772   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5773   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5774 
5775   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5776   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5777   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5778   PetscFunctionReturn(0);
5779 }
5780 
5781 /*
5782     Special version for direct calls from Fortran
5783 */
5784 #include <petsc-private/fortranimpl.h>
5785 
5786 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5787 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5788 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5789 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5790 #endif
5791 
5792 /* Change these macros so can be used in void function */
5793 #undef CHKERRQ
5794 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5795 #undef SETERRQ2
5796 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5797 #undef SETERRQ3
5798 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5799 #undef SETERRQ
5800 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5801 
5802 #undef __FUNCT__
5803 #define __FUNCT__ "matsetvaluesmpiaij_"
5804 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5805 {
5806   Mat            mat  = *mmat;
5807   PetscInt       m    = *mm, n = *mn;
5808   InsertMode     addv = *maddv;
5809   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5810   PetscScalar    value;
5811   PetscErrorCode ierr;
5812 
5813   MatCheckPreallocated(mat,1);
5814   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5815 
5816 #if defined(PETSC_USE_DEBUG)
5817   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5818 #endif
5819   {
5820     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5821     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5822     PetscBool roworiented = aij->roworiented;
5823 
5824     /* Some Variables required in the macro */
5825     Mat        A                 = aij->A;
5826     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5827     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5828     MatScalar  *aa               = a->a;
5829     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5830     Mat        B                 = aij->B;
5831     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5832     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5833     MatScalar  *ba               = b->a;
5834 
5835     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5836     PetscInt  nonew = a->nonew;
5837     MatScalar *ap1,*ap2;
5838 
5839     PetscFunctionBegin;
5840     for (i=0; i<m; i++) {
5841       if (im[i] < 0) continue;
5842 #if defined(PETSC_USE_DEBUG)
5843       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5844 #endif
5845       if (im[i] >= rstart && im[i] < rend) {
5846         row      = im[i] - rstart;
5847         lastcol1 = -1;
5848         rp1      = aj + ai[row];
5849         ap1      = aa + ai[row];
5850         rmax1    = aimax[row];
5851         nrow1    = ailen[row];
5852         low1     = 0;
5853         high1    = nrow1;
5854         lastcol2 = -1;
5855         rp2      = bj + bi[row];
5856         ap2      = ba + bi[row];
5857         rmax2    = bimax[row];
5858         nrow2    = bilen[row];
5859         low2     = 0;
5860         high2    = nrow2;
5861 
5862         for (j=0; j<n; j++) {
5863           if (roworiented) value = v[i*n+j];
5864           else value = v[i+j*m];
5865           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5866           if (in[j] >= cstart && in[j] < cend) {
5867             col = in[j] - cstart;
5868             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5869           } else if (in[j] < 0) continue;
5870 #if defined(PETSC_USE_DEBUG)
5871           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5872 #endif
5873           else {
5874             if (mat->was_assembled) {
5875               if (!aij->colmap) {
5876                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5877               }
5878 #if defined(PETSC_USE_CTABLE)
5879               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5880               col--;
5881 #else
5882               col = aij->colmap[in[j]] - 1;
5883 #endif
5884               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5885                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5886                 col  =  in[j];
5887                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5888                 B     = aij->B;
5889                 b     = (Mat_SeqAIJ*)B->data;
5890                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5891                 rp2   = bj + bi[row];
5892                 ap2   = ba + bi[row];
5893                 rmax2 = bimax[row];
5894                 nrow2 = bilen[row];
5895                 low2  = 0;
5896                 high2 = nrow2;
5897                 bm    = aij->B->rmap->n;
5898                 ba    = b->a;
5899               }
5900             } else col = in[j];
5901             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5902           }
5903         }
5904       } else if (!aij->donotstash) {
5905         if (roworiented) {
5906           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5907         } else {
5908           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5909         }
5910       }
5911     }
5912   }
5913   PetscFunctionReturnVoid();
5914 }
5915 
5916