xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b4617e5d49fdcc233b979009a03b98f0ddcb9dd0)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2511   PetscInt    nzlocal,nsends,nrecvs;
2512   PetscMPIInt *send_rank,*recv_rank;
2513   PetscInt    *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2514   PetscScalar *sbuf_a,**rbuf_a;
2515   PetscErrorCode (*Destroy)(Mat);
2516 } Mat_Redundant;
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2520 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2521 {
2522   PetscErrorCode ierr;
2523   Mat_Redundant  *redund=(Mat_Redundant*)ptr;
2524   PetscInt       i;
2525 
2526   PetscFunctionBegin;
2527   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2528   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2529   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2530   for (i=0; i<redund->nrecvs; i++) {
2531     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2532     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2533   }
2534   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2535   ierr = PetscFree(redund);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatDestroy_MatRedundant"
2541 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2542 {
2543   PetscErrorCode ierr;
2544   PetscContainer container;
2545   Mat_Redundant  *redund=NULL;
2546 
2547   PetscFunctionBegin;
2548   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2549   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2550   ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2551 
2552   A->ops->destroy = redund->Destroy;
2553 
2554   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2555   if (A->ops->destroy) {
2556     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2563 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2564 {
2565   PetscMPIInt    rank,size;
2566   MPI_Comm       comm,subcomm=psubcomm->comm;
2567   PetscErrorCode ierr;
2568   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,nsubcomm=psubcomm->n;
2569   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL;
2570   PetscInt       *rowrange = mat->rmap->range;
2571   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2572   Mat            A = aij->A,B=aij->B,C=*matredundant;
2573   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2574   PetscScalar    *sbuf_a;
2575   PetscInt       nzlocal=a->nz+b->nz;
2576   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2577   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N,m,n;
2578   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2579   MatScalar      *aworkA,*aworkB;
2580   PetscScalar    *vals;
2581   PetscMPIInt    tag1,tag2,tag3,imdex;
2582   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2583   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2584   MPI_Status     recv_status,*send_status;
2585   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2586   PetscInt       **rbuf_j=NULL;
2587   PetscScalar    **rbuf_a=NULL;
2588   Mat_Redundant  *redund =NULL;
2589   PetscContainer container;
2590   PetscInt       rstart_sub,rend_sub,mloc_sub;
2591 
2592   PetscFunctionBegin;
2593   ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr);
2594   if (M != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-square matrix yet, rows %D columns %D",M,N);
2595 
2596   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2597   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2598   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2599 
2600   if (reuse == MAT_REUSE_MATRIX) {
2601     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2602     ierr = MatGetLocalSize(C,&m,&n);CHKERRQ(ierr);
2603     if (m != mat->rmap->n || n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2604     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2605     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2606     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2607     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2608 
2609     nsends    = redund->nsends;
2610     nrecvs    = redund->nrecvs;
2611     send_rank = redund->send_rank;
2612     recv_rank = redund->recv_rank;
2613     sbuf_nz   = redund->sbuf_nz;
2614     rbuf_nz   = redund->rbuf_nz;
2615     sbuf_j    = redund->sbuf_j;
2616     sbuf_a    = redund->sbuf_a;
2617     rbuf_j    = redund->rbuf_j;
2618     rbuf_a    = redund->rbuf_a;
2619   }
2620 
2621   if (reuse == MAT_INITIAL_MATRIX) {
2622     PetscMPIInt subrank,subsize;
2623     PetscInt    nleftover,np_subcomm;
2624 
2625     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2626     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2627 
2628     /* get local size of redundant matrix */
2629     const PetscInt *range;
2630     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2631     rstart_sub = range[nsubcomm*subrank];
2632     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2633       rend_sub = range[nsubcomm*(subrank+1)];
2634     } else {
2635       rend_sub = mat->rmap->N;
2636     }
2637 
2638     mloc_sub = rend_sub - rstart_sub;
2639     /* printf("[%d] rstart_sub %d %d, mloc_sub %d\n",rank,rstart_sub,rend_sub, mloc_sub); */
2640 
2641     /* get the destination processors' id send_rank, nsends and nrecvs */
2642     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2643 
2644     np_subcomm = size/nsubcomm;
2645     nleftover  = size - nsubcomm*np_subcomm;
2646 
2647     nsends = 0; nrecvs = 0;
2648     for (i=0; i<size; i++) {
2649       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2650         send_rank[nsends]   = i; nsends++;
2651         recv_rank[nrecvs++] = i;
2652       }
2653     }
2654     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2655       i = size-nleftover-1;
2656       j = 0;
2657       while (j < nsubcomm - nleftover) {
2658         send_rank[nsends++] = i;
2659         i--; j++;
2660       }
2661     }
2662 
2663     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2664       for (i=0; i<nleftover; i++) {
2665         recv_rank[nrecvs++] = size-nleftover+i;
2666       }
2667     }
2668 
2669     /* allocate sbuf_j, sbuf_a */
2670     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2671     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2672     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2673   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2674 
2675   /* copy mat's local entries into the buffers */
2676   if (reuse == MAT_INITIAL_MATRIX) {
2677     rownz_max = 0;
2678     rptr      = sbuf_j;
2679     cols      = sbuf_j + rend-rstart + 1;
2680     vals      = sbuf_a;
2681     rptr[0]   = 0;
2682     for (i=0; i<rend-rstart; i++) {
2683       row    = i + rstart;
2684       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2685       ncols  = nzA + nzB;
2686       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2687       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2688       /* load the column indices for this row into cols */
2689       lwrite = 0;
2690       for (l=0; l<nzB; l++) {
2691         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2692           vals[lwrite]   = aworkB[l];
2693           cols[lwrite++] = ctmp;
2694         }
2695       }
2696       for (l=0; l<nzA; l++) {
2697         vals[lwrite]   = aworkA[l];
2698         cols[lwrite++] = cstart + cworkA[l];
2699       }
2700       for (l=0; l<nzB; l++) {
2701         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2702           vals[lwrite]   = aworkB[l];
2703           cols[lwrite++] = ctmp;
2704         }
2705       }
2706       vals     += ncols;
2707       cols     += ncols;
2708       rptr[i+1] = rptr[i] + ncols;
2709       if (rownz_max < ncols) rownz_max = ncols;
2710     }
2711     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2712   } else { /* only copy matrix values into sbuf_a */
2713     rptr    = sbuf_j;
2714     vals    = sbuf_a;
2715     rptr[0] = 0;
2716     for (i=0; i<rend-rstart; i++) {
2717       row    = i + rstart;
2718       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2719       ncols  = nzA + nzB;
2720       cworkB = b->j + b->i[i];
2721       aworkA = a->a + a->i[i];
2722       aworkB = b->a + b->i[i];
2723       lwrite = 0;
2724       for (l=0; l<nzB; l++) {
2725         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2726       }
2727       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2728       for (l=0; l<nzB; l++) {
2729         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2730       }
2731       vals     += ncols;
2732       rptr[i+1] = rptr[i] + ncols;
2733     }
2734   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2735 
2736   /* send nzlocal to others, and recv other's nzlocal */
2737   /*--------------------------------------------------*/
2738   if (reuse == MAT_INITIAL_MATRIX) {
2739     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2740 
2741     s_waits2 = s_waits3 + nsends;
2742     s_waits1 = s_waits2 + nsends;
2743     r_waits1 = s_waits1 + nsends;
2744     r_waits2 = r_waits1 + nrecvs;
2745     r_waits3 = r_waits2 + nrecvs;
2746   } else {
2747     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2748 
2749     r_waits3 = s_waits3 + nsends;
2750   }
2751 
2752   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2753   if (reuse == MAT_INITIAL_MATRIX) {
2754     /* get new tags to keep the communication clean */
2755     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2756     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2757     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2758 
2759     /* post receives of other's nzlocal */
2760     for (i=0; i<nrecvs; i++) {
2761       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2762     }
2763     /* send nzlocal to others */
2764     for (i=0; i<nsends; i++) {
2765       sbuf_nz[i] = nzlocal;
2766       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2767     }
2768     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2769     count = nrecvs;
2770     while (count) {
2771       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2772 
2773       recv_rank[imdex] = recv_status.MPI_SOURCE;
2774       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2775       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2776 
2777       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2778 
2779       rbuf_nz[imdex] += i + 2;
2780 
2781       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2782       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2783       count--;
2784     }
2785     /* wait on sends of nzlocal */
2786     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2787     /* send mat->i,j to others, and recv from other's */
2788     /*------------------------------------------------*/
2789     for (i=0; i<nsends; i++) {
2790       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2791       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2792     }
2793     /* wait on receives of mat->i,j */
2794     /*------------------------------*/
2795     count = nrecvs;
2796     while (count) {
2797       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2798       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2799       count--;
2800     }
2801     /* wait on sends of mat->i,j */
2802     /*---------------------------*/
2803     if (nsends) {
2804       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2805     }
2806   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2807 
2808   /* post receives, send and receive mat->a */
2809   /*----------------------------------------*/
2810   for (imdex=0; imdex<nrecvs; imdex++) {
2811     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2812   }
2813   for (i=0; i<nsends; i++) {
2814     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2815   }
2816   count = nrecvs;
2817   while (count) {
2818     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2819     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2820     count--;
2821   }
2822   if (nsends) {
2823     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2824   }
2825 
2826   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2827 
2828   /* create redundant matrix */
2829   /*-------------------------*/
2830   if (reuse == MAT_INITIAL_MATRIX) {
2831     /* compute rownz_max for preallocation */
2832     for (imdex=0; imdex<nrecvs; imdex++) {
2833       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2834       rptr = rbuf_j[imdex];
2835       for (i=0; i<j; i++) {
2836         ncols = rptr[i+1] - rptr[i];
2837         if (rownz_max < ncols) rownz_max = ncols;
2838       }
2839     }
2840 
2841     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2842     ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2843     /* ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DETERMINE,mat->cmap->N);CHKERRQ(ierr); -- give incorrect local column! */
2844     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2845     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2846     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2847     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2848   } else {
2849     C = *matredundant;
2850   }
2851 
2852   /* insert local matrix entries */
2853   rptr = sbuf_j;
2854   cols = sbuf_j + rend-rstart + 1;
2855   vals = sbuf_a;
2856   for (i=0; i<rend-rstart; i++) {
2857     row   = i + rstart;
2858     ncols = rptr[i+1] - rptr[i];
2859     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2860     vals += ncols;
2861     cols += ncols;
2862   }
2863   /* insert received matrix entries */
2864   for (imdex=0; imdex<nrecvs; imdex++) {
2865     rstart = rowrange[recv_rank[imdex]];
2866     rend   = rowrange[recv_rank[imdex]+1];
2867     rptr   = rbuf_j[imdex];
2868     cols   = rbuf_j[imdex] + rend-rstart + 1;
2869     vals   = rbuf_a[imdex];
2870     for (i=0; i<rend-rstart; i++) {
2871       row   = i + rstart;
2872       ncols = rptr[i+1] - rptr[i];
2873       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2874       vals += ncols;
2875       cols += ncols;
2876     }
2877   }
2878   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2879   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2880 
2881   if (reuse == MAT_INITIAL_MATRIX) {
2882     PetscContainer container;
2883     *matredundant = C;
2884     /* create a supporting struct and attach it to C for reuse */
2885     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2886     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2887     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2888     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2889     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2890     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2891 
2892     redund->nzlocal   = nzlocal;
2893     redund->nsends    = nsends;
2894     redund->nrecvs    = nrecvs;
2895     redund->send_rank = send_rank;
2896     redund->recv_rank = recv_rank;
2897     redund->sbuf_nz   = sbuf_nz;
2898     redund->rbuf_nz   = rbuf_nz;
2899     redund->sbuf_j    = sbuf_j;
2900     redund->sbuf_a    = sbuf_a;
2901     redund->rbuf_j    = rbuf_j;
2902     redund->rbuf_a    = rbuf_a;
2903 
2904     redund->Destroy = C->ops->destroy;
2905     C->ops->destroy = MatDestroy_MatRedundant;
2906   }
2907   PetscFunctionReturn(0);
2908 }
2909 
2910 #undef __FUNCT__
2911 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2912 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2913 {
2914   PetscMPIInt    rank,size;
2915   MPI_Comm       comm;
2916   PetscErrorCode ierr;
2917   PetscInt       nsends    = 0,nrecvs=0,i,rownz_max=0;
2918   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL;
2919   PetscInt       *rowrange = mat->rmap->range;
2920   Mat_MPIAIJ     *aij      = (Mat_MPIAIJ*)mat->data;
2921   Mat            A         = aij->A,B=aij->B,C=*matredundant;
2922   Mat_SeqAIJ     *a        = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2923   PetscScalar    *sbuf_a;
2924   PetscInt       nzlocal=a->nz+b->nz;
2925   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2926   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2927   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2928   MatScalar      *aworkA,*aworkB;
2929   PetscScalar    *vals;
2930   PetscMPIInt    tag1,tag2,tag3,imdex;
2931   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2932   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2933   MPI_Status     recv_status,*send_status;
2934   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2935   PetscInt       **rbuf_j=NULL;
2936   PetscScalar    **rbuf_a=NULL;
2937   Mat_Redundant  *redund =NULL;
2938   PetscContainer container;
2939 
2940   PetscFunctionBegin;
2941   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2942   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2943   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2944 
2945   if (reuse == MAT_REUSE_MATRIX) {
2946     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2947     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2948     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2949     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2950     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2951     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2952     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2953     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2954 
2955     nsends    = redund->nsends;
2956     nrecvs    = redund->nrecvs;
2957     send_rank = redund->send_rank;
2958     recv_rank = redund->recv_rank;
2959     sbuf_nz   = redund->sbuf_nz;
2960     rbuf_nz   = redund->rbuf_nz;
2961     sbuf_j    = redund->sbuf_j;
2962     sbuf_a    = redund->sbuf_a;
2963     rbuf_j    = redund->rbuf_j;
2964     rbuf_a    = redund->rbuf_a;
2965   }
2966 
2967   if (reuse == MAT_INITIAL_MATRIX) {
2968     PetscMPIInt subrank,subsize;
2969     PetscInt    nleftover,np_subcomm;
2970     /* get the destination processors' id send_rank, nsends and nrecvs */
2971     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2972     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2973     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2974 
2975     np_subcomm = size/nsubcomm;
2976     nleftover  = size - nsubcomm*np_subcomm;
2977 
2978     nsends = 0; nrecvs = 0;
2979     for (i=0; i<size; i++) { /* i=rank*/
2980       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2981         send_rank[nsends]   = i; nsends++;
2982         recv_rank[nrecvs++] = i;
2983       }
2984     }
2985     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2986       i = size-nleftover-1;
2987       j = 0;
2988       while (j < nsubcomm - nleftover) {
2989         send_rank[nsends++] = i;
2990         i--; j++;
2991       }
2992     }
2993 
2994     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2995       for (i=0; i<nleftover; i++) {
2996         recv_rank[nrecvs++] = size-nleftover+i;
2997       }
2998     }
2999 
3000     /* allocate sbuf_j, sbuf_a */
3001     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
3002     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
3003     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
3004   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
3005 
3006   /* copy mat's local entries into the buffers */
3007   if (reuse == MAT_INITIAL_MATRIX) {
3008     rownz_max = 0;
3009     rptr      = sbuf_j;
3010     cols      = sbuf_j + rend-rstart + 1;
3011     vals      = sbuf_a;
3012     rptr[0]   = 0;
3013     for (i=0; i<rend-rstart; i++) {
3014       row    = i + rstart;
3015       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
3016       ncols  = nzA + nzB;
3017       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
3018       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
3019       /* load the column indices for this row into cols */
3020       lwrite = 0;
3021       for (l=0; l<nzB; l++) {
3022         if ((ctmp = bmap[cworkB[l]]) < cstart) {
3023           vals[lwrite]   = aworkB[l];
3024           cols[lwrite++] = ctmp;
3025         }
3026       }
3027       for (l=0; l<nzA; l++) {
3028         vals[lwrite]   = aworkA[l];
3029         cols[lwrite++] = cstart + cworkA[l];
3030       }
3031       for (l=0; l<nzB; l++) {
3032         if ((ctmp = bmap[cworkB[l]]) >= cend) {
3033           vals[lwrite]   = aworkB[l];
3034           cols[lwrite++] = ctmp;
3035         }
3036       }
3037       vals     += ncols;
3038       cols     += ncols;
3039       rptr[i+1] = rptr[i] + ncols;
3040       if (rownz_max < ncols) rownz_max = ncols;
3041     }
3042     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
3043   } else { /* only copy matrix values into sbuf_a */
3044     rptr    = sbuf_j;
3045     vals    = sbuf_a;
3046     rptr[0] = 0;
3047     for (i=0; i<rend-rstart; i++) {
3048       row    = i + rstart;
3049       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
3050       ncols  = nzA + nzB;
3051       cworkB = b->j + b->i[i];
3052       aworkA = a->a + a->i[i];
3053       aworkB = b->a + b->i[i];
3054       lwrite = 0;
3055       for (l=0; l<nzB; l++) {
3056         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
3057       }
3058       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
3059       for (l=0; l<nzB; l++) {
3060         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
3061       }
3062       vals     += ncols;
3063       rptr[i+1] = rptr[i] + ncols;
3064     }
3065   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
3066 
3067   /* send nzlocal to others, and recv other's nzlocal */
3068   /*--------------------------------------------------*/
3069   if (reuse == MAT_INITIAL_MATRIX) {
3070     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
3071 
3072     s_waits2 = s_waits3 + nsends;
3073     s_waits1 = s_waits2 + nsends;
3074     r_waits1 = s_waits1 + nsends;
3075     r_waits2 = r_waits1 + nrecvs;
3076     r_waits3 = r_waits2 + nrecvs;
3077   } else {
3078     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
3079 
3080     r_waits3 = s_waits3 + nsends;
3081   }
3082 
3083   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
3084   if (reuse == MAT_INITIAL_MATRIX) {
3085     /* get new tags to keep the communication clean */
3086     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
3087     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
3088     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
3089 
3090     /* post receives of other's nzlocal */
3091     for (i=0; i<nrecvs; i++) {
3092       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
3093     }
3094     /* send nzlocal to others */
3095     for (i=0; i<nsends; i++) {
3096       sbuf_nz[i] = nzlocal;
3097       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
3098     }
3099     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
3100     count = nrecvs;
3101     while (count) {
3102       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
3103 
3104       recv_rank[imdex] = recv_status.MPI_SOURCE;
3105       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
3106       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
3107 
3108       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
3109 
3110       rbuf_nz[imdex] += i + 2;
3111 
3112       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
3113       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
3114       count--;
3115     }
3116     /* wait on sends of nzlocal */
3117     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
3118     /* send mat->i,j to others, and recv from other's */
3119     /*------------------------------------------------*/
3120     for (i=0; i<nsends; i++) {
3121       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
3122       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
3123     }
3124     /* wait on receives of mat->i,j */
3125     /*------------------------------*/
3126     count = nrecvs;
3127     while (count) {
3128       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
3129       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
3130       count--;
3131     }
3132     /* wait on sends of mat->i,j */
3133     /*---------------------------*/
3134     if (nsends) {
3135       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
3136     }
3137   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
3138 
3139   /* post receives, send and receive mat->a */
3140   /*----------------------------------------*/
3141   for (imdex=0; imdex<nrecvs; imdex++) {
3142     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
3143   }
3144   for (i=0; i<nsends; i++) {
3145     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
3146   }
3147   count = nrecvs;
3148   while (count) {
3149     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
3150     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
3151     count--;
3152   }
3153   if (nsends) {
3154     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
3155   }
3156 
3157   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
3158 
3159   /* create redundant matrix */
3160   /*-------------------------*/
3161   if (reuse == MAT_INITIAL_MATRIX) {
3162     /* compute rownz_max for preallocation */
3163     for (imdex=0; imdex<nrecvs; imdex++) {
3164       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
3165       rptr = rbuf_j[imdex];
3166       for (i=0; i<j; i++) {
3167         ncols = rptr[i+1] - rptr[i];
3168         if (rownz_max < ncols) rownz_max = ncols;
3169       }
3170     }
3171 
3172     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
3173     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
3174     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
3175     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
3176     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
3177     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
3178   } else {
3179     C = *matredundant;
3180   }
3181 
3182   /* insert local matrix entries */
3183   rptr = sbuf_j;
3184   cols = sbuf_j + rend-rstart + 1;
3185   vals = sbuf_a;
3186   for (i=0; i<rend-rstart; i++) {
3187     row   = i + rstart;
3188     ncols = rptr[i+1] - rptr[i];
3189     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3190     vals += ncols;
3191     cols += ncols;
3192   }
3193   /* insert received matrix entries */
3194   for (imdex=0; imdex<nrecvs; imdex++) {
3195     rstart = rowrange[recv_rank[imdex]];
3196     rend   = rowrange[recv_rank[imdex]+1];
3197     rptr   = rbuf_j[imdex];
3198     cols   = rbuf_j[imdex] + rend-rstart + 1;
3199     vals   = rbuf_a[imdex];
3200     for (i=0; i<rend-rstart; i++) {
3201       row   = i + rstart;
3202       ncols = rptr[i+1] - rptr[i];
3203       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3204       vals += ncols;
3205       cols += ncols;
3206     }
3207   }
3208   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3209   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3210   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
3211   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
3212   if (reuse == MAT_INITIAL_MATRIX) {
3213     PetscContainer container;
3214     *matredundant = C;
3215     /* create a supporting struct and attach it to C for reuse */
3216     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
3217     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3218     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
3219     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
3220     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
3221     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
3222 
3223     redund->nzlocal   = nzlocal;
3224     redund->nsends    = nsends;
3225     redund->nrecvs    = nrecvs;
3226     redund->send_rank = send_rank;
3227     redund->recv_rank = recv_rank;
3228     redund->sbuf_nz   = sbuf_nz;
3229     redund->rbuf_nz   = rbuf_nz;
3230     redund->sbuf_j    = sbuf_j;
3231     redund->sbuf_a    = sbuf_a;
3232     redund->rbuf_j    = rbuf_j;
3233     redund->rbuf_a    = rbuf_a;
3234 
3235     redund->Destroy = C->ops->destroy;
3236     C->ops->destroy = MatDestroy_MatRedundant;
3237   }
3238   PetscFunctionReturn(0);
3239 }
3240 
3241 #undef __FUNCT__
3242 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
3243 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3244 {
3245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3246   PetscErrorCode ierr;
3247   PetscInt       i,*idxb = 0;
3248   PetscScalar    *va,*vb;
3249   Vec            vtmp;
3250 
3251   PetscFunctionBegin;
3252   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
3253   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3254   if (idx) {
3255     for (i=0; i<A->rmap->n; i++) {
3256       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3257     }
3258   }
3259 
3260   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3261   if (idx) {
3262     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3263   }
3264   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3265   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3266 
3267   for (i=0; i<A->rmap->n; i++) {
3268     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3269       va[i] = vb[i];
3270       if (idx) idx[i] = a->garray[idxb[i]];
3271     }
3272   }
3273 
3274   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3275   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3276   ierr = PetscFree(idxb);CHKERRQ(ierr);
3277   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3278   PetscFunctionReturn(0);
3279 }
3280 
3281 #undef __FUNCT__
3282 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
3283 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3284 {
3285   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3286   PetscErrorCode ierr;
3287   PetscInt       i,*idxb = 0;
3288   PetscScalar    *va,*vb;
3289   Vec            vtmp;
3290 
3291   PetscFunctionBegin;
3292   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
3293   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3294   if (idx) {
3295     for (i=0; i<A->cmap->n; i++) {
3296       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3297     }
3298   }
3299 
3300   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3301   if (idx) {
3302     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3303   }
3304   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3305   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3306 
3307   for (i=0; i<A->rmap->n; i++) {
3308     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3309       va[i] = vb[i];
3310       if (idx) idx[i] = a->garray[idxb[i]];
3311     }
3312   }
3313 
3314   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3315   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3316   ierr = PetscFree(idxb);CHKERRQ(ierr);
3317   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3318   PetscFunctionReturn(0);
3319 }
3320 
3321 #undef __FUNCT__
3322 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3323 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3324 {
3325   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3326   PetscInt       n      = A->rmap->n;
3327   PetscInt       cstart = A->cmap->rstart;
3328   PetscInt       *cmap  = mat->garray;
3329   PetscInt       *diagIdx, *offdiagIdx;
3330   Vec            diagV, offdiagV;
3331   PetscScalar    *a, *diagA, *offdiagA;
3332   PetscInt       r;
3333   PetscErrorCode ierr;
3334 
3335   PetscFunctionBegin;
3336   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3337   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3338   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3339   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3340   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3341   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3342   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3343   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3344   for (r = 0; r < n; ++r) {
3345     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3346       a[r]   = diagA[r];
3347       idx[r] = cstart + diagIdx[r];
3348     } else {
3349       a[r]   = offdiagA[r];
3350       idx[r] = cmap[offdiagIdx[r]];
3351     }
3352   }
3353   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3354   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3355   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3356   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3357   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3358   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 #undef __FUNCT__
3363 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3364 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3365 {
3366   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3367   PetscInt       n      = A->rmap->n;
3368   PetscInt       cstart = A->cmap->rstart;
3369   PetscInt       *cmap  = mat->garray;
3370   PetscInt       *diagIdx, *offdiagIdx;
3371   Vec            diagV, offdiagV;
3372   PetscScalar    *a, *diagA, *offdiagA;
3373   PetscInt       r;
3374   PetscErrorCode ierr;
3375 
3376   PetscFunctionBegin;
3377   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3378   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3379   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3380   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3381   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3382   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3383   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3384   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3385   for (r = 0; r < n; ++r) {
3386     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3387       a[r]   = diagA[r];
3388       idx[r] = cstart + diagIdx[r];
3389     } else {
3390       a[r]   = offdiagA[r];
3391       idx[r] = cmap[offdiagIdx[r]];
3392     }
3393   }
3394   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3395   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3396   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3397   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3398   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3399   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3400   PetscFunctionReturn(0);
3401 }
3402 
3403 #undef __FUNCT__
3404 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3405 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3406 {
3407   PetscErrorCode ierr;
3408   Mat            *dummy;
3409 
3410   PetscFunctionBegin;
3411   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3412   *newmat = *dummy;
3413   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3414   PetscFunctionReturn(0);
3415 }
3416 
3417 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3418 
3419 #undef __FUNCT__
3420 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3421 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3422 {
3423   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3424   PetscErrorCode ierr;
3425 
3426   PetscFunctionBegin;
3427   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3428   PetscFunctionReturn(0);
3429 }
3430 
3431 #undef __FUNCT__
3432 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3433 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3434 {
3435   PetscErrorCode ierr;
3436   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3437 
3438   PetscFunctionBegin;
3439   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3440   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3441   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3442   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3443   PetscFunctionReturn(0);
3444 }
3445 
3446 /* -------------------------------------------------------------------*/
3447 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3448                                        MatGetRow_MPIAIJ,
3449                                        MatRestoreRow_MPIAIJ,
3450                                        MatMult_MPIAIJ,
3451                                 /* 4*/ MatMultAdd_MPIAIJ,
3452                                        MatMultTranspose_MPIAIJ,
3453                                        MatMultTransposeAdd_MPIAIJ,
3454 #if defined(PETSC_HAVE_PBGL)
3455                                        MatSolve_MPIAIJ,
3456 #else
3457                                        0,
3458 #endif
3459                                        0,
3460                                        0,
3461                                 /*10*/ 0,
3462                                        0,
3463                                        0,
3464                                        MatSOR_MPIAIJ,
3465                                        MatTranspose_MPIAIJ,
3466                                 /*15*/ MatGetInfo_MPIAIJ,
3467                                        MatEqual_MPIAIJ,
3468                                        MatGetDiagonal_MPIAIJ,
3469                                        MatDiagonalScale_MPIAIJ,
3470                                        MatNorm_MPIAIJ,
3471                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3472                                        MatAssemblyEnd_MPIAIJ,
3473                                        MatSetOption_MPIAIJ,
3474                                        MatZeroEntries_MPIAIJ,
3475                                 /*24*/ MatZeroRows_MPIAIJ,
3476                                        0,
3477 #if defined(PETSC_HAVE_PBGL)
3478                                        0,
3479 #else
3480                                        0,
3481 #endif
3482                                        0,
3483                                        0,
3484                                 /*29*/ MatSetUp_MPIAIJ,
3485 #if defined(PETSC_HAVE_PBGL)
3486                                        0,
3487 #else
3488                                        0,
3489 #endif
3490                                        0,
3491                                        0,
3492                                        0,
3493                                 /*34*/ MatDuplicate_MPIAIJ,
3494                                        0,
3495                                        0,
3496                                        0,
3497                                        0,
3498                                 /*39*/ MatAXPY_MPIAIJ,
3499                                        MatGetSubMatrices_MPIAIJ,
3500                                        MatIncreaseOverlap_MPIAIJ,
3501                                        MatGetValues_MPIAIJ,
3502                                        MatCopy_MPIAIJ,
3503                                 /*44*/ MatGetRowMax_MPIAIJ,
3504                                        MatScale_MPIAIJ,
3505                                        0,
3506                                        0,
3507                                        MatZeroRowsColumns_MPIAIJ,
3508                                 /*49*/ MatSetRandom_MPIAIJ,
3509                                        0,
3510                                        0,
3511                                        0,
3512                                        0,
3513                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3514                                        0,
3515                                        MatSetUnfactored_MPIAIJ,
3516                                        MatPermute_MPIAIJ,
3517                                        0,
3518                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3519                                        MatDestroy_MPIAIJ,
3520                                        MatView_MPIAIJ,
3521                                        0,
3522                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3523                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3524                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3525                                        0,
3526                                        0,
3527                                        0,
3528                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3529                                        MatGetRowMinAbs_MPIAIJ,
3530                                        0,
3531                                        MatSetColoring_MPIAIJ,
3532                                        0,
3533                                        MatSetValuesAdifor_MPIAIJ,
3534                                 /*75*/ MatFDColoringApply_AIJ,
3535                                        0,
3536                                        0,
3537                                        0,
3538                                        MatFindZeroDiagonals_MPIAIJ,
3539                                 /*80*/ 0,
3540                                        0,
3541                                        0,
3542                                 /*83*/ MatLoad_MPIAIJ,
3543                                        0,
3544                                        0,
3545                                        0,
3546                                        0,
3547                                        0,
3548                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3549                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3550                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3551                                        MatPtAP_MPIAIJ_MPIAIJ,
3552                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3553                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3554                                        0,
3555                                        0,
3556                                        0,
3557                                        0,
3558                                 /*99*/ 0,
3559                                        0,
3560                                        0,
3561                                        MatConjugate_MPIAIJ,
3562                                        0,
3563                                 /*104*/MatSetValuesRow_MPIAIJ,
3564                                        MatRealPart_MPIAIJ,
3565                                        MatImaginaryPart_MPIAIJ,
3566                                        0,
3567                                        0,
3568                                 /*109*/0,
3569                                        MatGetRedundantMatrix_MPIAIJ,
3570                                        MatGetRowMin_MPIAIJ,
3571                                        0,
3572                                        0,
3573                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3574                                        0,
3575                                        0,
3576                                        0,
3577                                        0,
3578                                 /*119*/0,
3579                                        0,
3580                                        0,
3581                                        0,
3582                                        MatGetMultiProcBlock_MPIAIJ,
3583                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3584                                        MatGetColumnNorms_MPIAIJ,
3585                                        MatInvertBlockDiagonal_MPIAIJ,
3586                                        0,
3587                                        MatGetSubMatricesParallel_MPIAIJ,
3588                                 /*129*/0,
3589                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3590                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3591                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3592                                        0,
3593                                 /*134*/0,
3594                                        0,
3595                                        0,
3596                                        0,
3597                                        0,
3598                                 /*139*/0,
3599                                        0
3600 };
3601 
3602 /* ----------------------------------------------------------------------------------------*/
3603 
3604 #undef __FUNCT__
3605 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3606 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3607 {
3608   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3609   PetscErrorCode ierr;
3610 
3611   PetscFunctionBegin;
3612   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3613   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3614   PetscFunctionReturn(0);
3615 }
3616 
3617 #undef __FUNCT__
3618 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3619 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3620 {
3621   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3622   PetscErrorCode ierr;
3623 
3624   PetscFunctionBegin;
3625   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3626   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3627   PetscFunctionReturn(0);
3628 }
3629 
3630 #undef __FUNCT__
3631 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3632 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3633 {
3634   Mat_MPIAIJ     *b;
3635   PetscErrorCode ierr;
3636 
3637   PetscFunctionBegin;
3638   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3639   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3640   b = (Mat_MPIAIJ*)B->data;
3641 
3642   if (!B->preallocated) {
3643     /* Explicitly create 2 MATSEQAIJ matrices. */
3644     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3645     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3646     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3647     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3648     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3649     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3650     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3651     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3652     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3653     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3654   }
3655 
3656   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3657   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3658   B->preallocated = PETSC_TRUE;
3659   PetscFunctionReturn(0);
3660 }
3661 
3662 #undef __FUNCT__
3663 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3664 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3665 {
3666   Mat            mat;
3667   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3668   PetscErrorCode ierr;
3669 
3670   PetscFunctionBegin;
3671   *newmat = 0;
3672   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3673   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3674   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3675   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3676   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3677   a       = (Mat_MPIAIJ*)mat->data;
3678 
3679   mat->factortype   = matin->factortype;
3680   mat->rmap->bs     = matin->rmap->bs;
3681   mat->cmap->bs     = matin->cmap->bs;
3682   mat->assembled    = PETSC_TRUE;
3683   mat->insertmode   = NOT_SET_VALUES;
3684   mat->preallocated = PETSC_TRUE;
3685 
3686   a->size         = oldmat->size;
3687   a->rank         = oldmat->rank;
3688   a->donotstash   = oldmat->donotstash;
3689   a->roworiented  = oldmat->roworiented;
3690   a->rowindices   = 0;
3691   a->rowvalues    = 0;
3692   a->getrowactive = PETSC_FALSE;
3693 
3694   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3695   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3696 
3697   if (oldmat->colmap) {
3698 #if defined(PETSC_USE_CTABLE)
3699     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3700 #else
3701     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3702     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3703     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3704 #endif
3705   } else a->colmap = 0;
3706   if (oldmat->garray) {
3707     PetscInt len;
3708     len  = oldmat->B->cmap->n;
3709     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3710     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3711     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3712   } else a->garray = 0;
3713 
3714   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3715   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3716   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3717   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3718   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3719   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3720   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3721   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3722   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3723   *newmat = mat;
3724   PetscFunctionReturn(0);
3725 }
3726 
3727 
3728 
3729 #undef __FUNCT__
3730 #define __FUNCT__ "MatLoad_MPIAIJ"
3731 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3732 {
3733   PetscScalar    *vals,*svals;
3734   MPI_Comm       comm;
3735   PetscErrorCode ierr;
3736   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3737   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3738   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3739   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3740   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3741   int            fd;
3742   PetscInt       bs = 1;
3743 
3744   PetscFunctionBegin;
3745   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3746   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3747   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3748   if (!rank) {
3749     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3750     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3751     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3752   }
3753 
3754   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3755   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3756   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3757 
3758   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3759 
3760   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3761   M    = header[1]; N = header[2];
3762   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3763   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3764   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3765 
3766   /* If global sizes are set, check if they are consistent with that given in the file */
3767   if (sizesset) {
3768     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3769   }
3770   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3771   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3772 
3773   /* determine ownership of all (block) rows */
3774   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3775   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3776   else m = newMat->rmap->n; /* Set by user */
3777 
3778   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3779   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3780 
3781   /* First process needs enough room for process with most rows */
3782   if (!rank) {
3783     mmax = rowners[1];
3784     for (i=2; i<=size; i++) {
3785       mmax = PetscMax(mmax, rowners[i]);
3786     }
3787   } else mmax = -1;             /* unused, but compilers complain */
3788 
3789   rowners[0] = 0;
3790   for (i=2; i<=size; i++) {
3791     rowners[i] += rowners[i-1];
3792   }
3793   rstart = rowners[rank];
3794   rend   = rowners[rank+1];
3795 
3796   /* distribute row lengths to all processors */
3797   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3798   if (!rank) {
3799     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3800     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3801     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3802     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3803     for (j=0; j<m; j++) {
3804       procsnz[0] += ourlens[j];
3805     }
3806     for (i=1; i<size; i++) {
3807       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3808       /* calculate the number of nonzeros on each processor */
3809       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3810         procsnz[i] += rowlengths[j];
3811       }
3812       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3813     }
3814     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3815   } else {
3816     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3817   }
3818 
3819   if (!rank) {
3820     /* determine max buffer needed and allocate it */
3821     maxnz = 0;
3822     for (i=0; i<size; i++) {
3823       maxnz = PetscMax(maxnz,procsnz[i]);
3824     }
3825     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3826 
3827     /* read in my part of the matrix column indices  */
3828     nz   = procsnz[0];
3829     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3830     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3831 
3832     /* read in every one elses and ship off */
3833     for (i=1; i<size; i++) {
3834       nz   = procsnz[i];
3835       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3836       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3837     }
3838     ierr = PetscFree(cols);CHKERRQ(ierr);
3839   } else {
3840     /* determine buffer space needed for message */
3841     nz = 0;
3842     for (i=0; i<m; i++) {
3843       nz += ourlens[i];
3844     }
3845     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3846 
3847     /* receive message of column indices*/
3848     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3849   }
3850 
3851   /* determine column ownership if matrix is not square */
3852   if (N != M) {
3853     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3854     else n = newMat->cmap->n;
3855     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3856     cstart = cend - n;
3857   } else {
3858     cstart = rstart;
3859     cend   = rend;
3860     n      = cend - cstart;
3861   }
3862 
3863   /* loop over local rows, determining number of off diagonal entries */
3864   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3865   jj   = 0;
3866   for (i=0; i<m; i++) {
3867     for (j=0; j<ourlens[i]; j++) {
3868       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3869       jj++;
3870     }
3871   }
3872 
3873   for (i=0; i<m; i++) {
3874     ourlens[i] -= offlens[i];
3875   }
3876   if (!sizesset) {
3877     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3878   }
3879 
3880   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3881 
3882   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3883 
3884   for (i=0; i<m; i++) {
3885     ourlens[i] += offlens[i];
3886   }
3887 
3888   if (!rank) {
3889     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3890 
3891     /* read in my part of the matrix numerical values  */
3892     nz   = procsnz[0];
3893     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3894 
3895     /* insert into matrix */
3896     jj      = rstart;
3897     smycols = mycols;
3898     svals   = vals;
3899     for (i=0; i<m; i++) {
3900       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3901       smycols += ourlens[i];
3902       svals   += ourlens[i];
3903       jj++;
3904     }
3905 
3906     /* read in other processors and ship out */
3907     for (i=1; i<size; i++) {
3908       nz   = procsnz[i];
3909       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3910       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3911     }
3912     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3913   } else {
3914     /* receive numeric values */
3915     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3916 
3917     /* receive message of values*/
3918     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3919 
3920     /* insert into matrix */
3921     jj      = rstart;
3922     smycols = mycols;
3923     svals   = vals;
3924     for (i=0; i<m; i++) {
3925       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3926       smycols += ourlens[i];
3927       svals   += ourlens[i];
3928       jj++;
3929     }
3930   }
3931   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3932   ierr = PetscFree(vals);CHKERRQ(ierr);
3933   ierr = PetscFree(mycols);CHKERRQ(ierr);
3934   ierr = PetscFree(rowners);CHKERRQ(ierr);
3935   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3936   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3937   PetscFunctionReturn(0);
3938 }
3939 
3940 #undef __FUNCT__
3941 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3942 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3943 {
3944   PetscErrorCode ierr;
3945   IS             iscol_local;
3946   PetscInt       csize;
3947 
3948   PetscFunctionBegin;
3949   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3950   if (call == MAT_REUSE_MATRIX) {
3951     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3952     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3953   } else {
3954     PetscInt cbs;
3955     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3956     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3957     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3958   }
3959   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3960   if (call == MAT_INITIAL_MATRIX) {
3961     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3962     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3963   }
3964   PetscFunctionReturn(0);
3965 }
3966 
3967 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3968 #undef __FUNCT__
3969 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3970 /*
3971     Not great since it makes two copies of the submatrix, first an SeqAIJ
3972   in local and then by concatenating the local matrices the end result.
3973   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3974 
3975   Note: This requires a sequential iscol with all indices.
3976 */
3977 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3978 {
3979   PetscErrorCode ierr;
3980   PetscMPIInt    rank,size;
3981   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3982   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3983   PetscBool      allcolumns, colflag;
3984   Mat            M,Mreuse;
3985   MatScalar      *vwork,*aa;
3986   MPI_Comm       comm;
3987   Mat_SeqAIJ     *aij;
3988 
3989   PetscFunctionBegin;
3990   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3991   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3992   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3993 
3994   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3995   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3996   if (colflag && ncol == mat->cmap->N) {
3997     allcolumns = PETSC_TRUE;
3998   } else {
3999     allcolumns = PETSC_FALSE;
4000   }
4001   if (call ==  MAT_REUSE_MATRIX) {
4002     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
4003     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
4004     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
4005   } else {
4006     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
4007   }
4008 
4009   /*
4010       m - number of local rows
4011       n - number of columns (same on all processors)
4012       rstart - first row in new global matrix generated
4013   */
4014   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
4015   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
4016   if (call == MAT_INITIAL_MATRIX) {
4017     aij = (Mat_SeqAIJ*)(Mreuse)->data;
4018     ii  = aij->i;
4019     jj  = aij->j;
4020 
4021     /*
4022         Determine the number of non-zeros in the diagonal and off-diagonal
4023         portions of the matrix in order to do correct preallocation
4024     */
4025 
4026     /* first get start and end of "diagonal" columns */
4027     if (csize == PETSC_DECIDE) {
4028       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
4029       if (mglobal == n) { /* square matrix */
4030         nlocal = m;
4031       } else {
4032         nlocal = n/size + ((n % size) > rank);
4033       }
4034     } else {
4035       nlocal = csize;
4036     }
4037     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4038     rstart = rend - nlocal;
4039     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
4040 
4041     /* next, compute all the lengths */
4042     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
4043     olens = dlens + m;
4044     for (i=0; i<m; i++) {
4045       jend = ii[i+1] - ii[i];
4046       olen = 0;
4047       dlen = 0;
4048       for (j=0; j<jend; j++) {
4049         if (*jj < rstart || *jj >= rend) olen++;
4050         else dlen++;
4051         jj++;
4052       }
4053       olens[i] = olen;
4054       dlens[i] = dlen;
4055     }
4056     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
4057     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
4058     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
4059     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
4060     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
4061     ierr = PetscFree(dlens);CHKERRQ(ierr);
4062   } else {
4063     PetscInt ml,nl;
4064 
4065     M    = *newmat;
4066     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
4067     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
4068     ierr = MatZeroEntries(M);CHKERRQ(ierr);
4069     /*
4070          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
4071        rather than the slower MatSetValues().
4072     */
4073     M->was_assembled = PETSC_TRUE;
4074     M->assembled     = PETSC_FALSE;
4075   }
4076   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
4077   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
4078   ii   = aij->i;
4079   jj   = aij->j;
4080   aa   = aij->a;
4081   for (i=0; i<m; i++) {
4082     row   = rstart + i;
4083     nz    = ii[i+1] - ii[i];
4084     cwork = jj;     jj += nz;
4085     vwork = aa;     aa += nz;
4086     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
4087   }
4088 
4089   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4090   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4091   *newmat = M;
4092 
4093   /* save submatrix used in processor for next request */
4094   if (call ==  MAT_INITIAL_MATRIX) {
4095     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
4096     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
4097   }
4098   PetscFunctionReturn(0);
4099 }
4100 
4101 #undef __FUNCT__
4102 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
4103 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4104 {
4105   PetscInt       m,cstart, cend,j,nnz,i,d;
4106   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
4107   const PetscInt *JJ;
4108   PetscScalar    *values;
4109   PetscErrorCode ierr;
4110 
4111   PetscFunctionBegin;
4112   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
4113 
4114   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
4115   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
4116   m      = B->rmap->n;
4117   cstart = B->cmap->rstart;
4118   cend   = B->cmap->rend;
4119   rstart = B->rmap->rstart;
4120 
4121   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
4122 
4123 #if defined(PETSC_USE_DEBUGGING)
4124   for (i=0; i<m; i++) {
4125     nnz = Ii[i+1]- Ii[i];
4126     JJ  = J + Ii[i];
4127     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
4128     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
4129     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4130   }
4131 #endif
4132 
4133   for (i=0; i<m; i++) {
4134     nnz     = Ii[i+1]- Ii[i];
4135     JJ      = J + Ii[i];
4136     nnz_max = PetscMax(nnz_max,nnz);
4137     d       = 0;
4138     for (j=0; j<nnz; j++) {
4139       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4140     }
4141     d_nnz[i] = d;
4142     o_nnz[i] = nnz - d;
4143   }
4144   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4145   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4146 
4147   if (v) values = (PetscScalar*)v;
4148   else {
4149     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
4150     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
4151   }
4152 
4153   for (i=0; i<m; i++) {
4154     ii   = i + rstart;
4155     nnz  = Ii[i+1]- Ii[i];
4156     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
4157   }
4158   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4159   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4160 
4161   if (!v) {
4162     ierr = PetscFree(values);CHKERRQ(ierr);
4163   }
4164   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4165   PetscFunctionReturn(0);
4166 }
4167 
4168 #undef __FUNCT__
4169 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
4170 /*@
4171    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4172    (the default parallel PETSc format).
4173 
4174    Collective on MPI_Comm
4175 
4176    Input Parameters:
4177 +  B - the matrix
4178 .  i - the indices into j for the start of each local row (starts with zero)
4179 .  j - the column indices for each local row (starts with zero)
4180 -  v - optional values in the matrix
4181 
4182    Level: developer
4183 
4184    Notes:
4185        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4186      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4187      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4188 
4189        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4190 
4191        The format which is used for the sparse matrix input, is equivalent to a
4192     row-major ordering.. i.e for the following matrix, the input data expected is
4193     as shown:
4194 
4195         1 0 0
4196         2 0 3     P0
4197        -------
4198         4 5 6     P1
4199 
4200      Process0 [P0]: rows_owned=[0,1]
4201         i =  {0,1,3}  [size = nrow+1  = 2+1]
4202         j =  {0,0,2}  [size = nz = 6]
4203         v =  {1,2,3}  [size = nz = 6]
4204 
4205      Process1 [P1]: rows_owned=[2]
4206         i =  {0,3}    [size = nrow+1  = 1+1]
4207         j =  {0,1,2}  [size = nz = 6]
4208         v =  {4,5,6}  [size = nz = 6]
4209 
4210 .keywords: matrix, aij, compressed row, sparse, parallel
4211 
4212 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
4213           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4214 @*/
4215 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4216 {
4217   PetscErrorCode ierr;
4218 
4219   PetscFunctionBegin;
4220   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4221   PetscFunctionReturn(0);
4222 }
4223 
4224 #undef __FUNCT__
4225 #define __FUNCT__ "MatMPIAIJSetPreallocation"
4226 /*@C
4227    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4228    (the default parallel PETSc format).  For good matrix assembly performance
4229    the user should preallocate the matrix storage by setting the parameters
4230    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4231    performance can be increased by more than a factor of 50.
4232 
4233    Collective on MPI_Comm
4234 
4235    Input Parameters:
4236 +  A - the matrix
4237 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4238            (same value is used for all local rows)
4239 .  d_nnz - array containing the number of nonzeros in the various rows of the
4240            DIAGONAL portion of the local submatrix (possibly different for each row)
4241            or NULL, if d_nz is used to specify the nonzero structure.
4242            The size of this array is equal to the number of local rows, i.e 'm'.
4243            For matrices that will be factored, you must leave room for (and set)
4244            the diagonal entry even if it is zero.
4245 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4246            submatrix (same value is used for all local rows).
4247 -  o_nnz - array containing the number of nonzeros in the various rows of the
4248            OFF-DIAGONAL portion of the local submatrix (possibly different for
4249            each row) or NULL, if o_nz is used to specify the nonzero
4250            structure. The size of this array is equal to the number
4251            of local rows, i.e 'm'.
4252 
4253    If the *_nnz parameter is given then the *_nz parameter is ignored
4254 
4255    The AIJ format (also called the Yale sparse matrix format or
4256    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4257    storage.  The stored row and column indices begin with zero.
4258    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
4259 
4260    The parallel matrix is partitioned such that the first m0 rows belong to
4261    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4262    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4263 
4264    The DIAGONAL portion of the local submatrix of a processor can be defined
4265    as the submatrix which is obtained by extraction the part corresponding to
4266    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4267    first row that belongs to the processor, r2 is the last row belonging to
4268    the this processor, and c1-c2 is range of indices of the local part of a
4269    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4270    common case of a square matrix, the row and column ranges are the same and
4271    the DIAGONAL part is also square. The remaining portion of the local
4272    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4273 
4274    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4275 
4276    You can call MatGetInfo() to get information on how effective the preallocation was;
4277    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4278    You can also run with the option -info and look for messages with the string
4279    malloc in them to see if additional memory allocation was needed.
4280 
4281    Example usage:
4282 
4283    Consider the following 8x8 matrix with 34 non-zero values, that is
4284    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4285    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4286    as follows:
4287 
4288 .vb
4289             1  2  0  |  0  3  0  |  0  4
4290     Proc0   0  5  6  |  7  0  0  |  8  0
4291             9  0 10  | 11  0  0  | 12  0
4292     -------------------------------------
4293            13  0 14  | 15 16 17  |  0  0
4294     Proc1   0 18  0  | 19 20 21  |  0  0
4295             0  0  0  | 22 23  0  | 24  0
4296     -------------------------------------
4297     Proc2  25 26 27  |  0  0 28  | 29  0
4298            30  0  0  | 31 32 33  |  0 34
4299 .ve
4300 
4301    This can be represented as a collection of submatrices as:
4302 
4303 .vb
4304       A B C
4305       D E F
4306       G H I
4307 .ve
4308 
4309    Where the submatrices A,B,C are owned by proc0, D,E,F are
4310    owned by proc1, G,H,I are owned by proc2.
4311 
4312    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4313    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4314    The 'M','N' parameters are 8,8, and have the same values on all procs.
4315 
4316    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4317    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4318    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4319    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4320    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4321    matrix, ans [DF] as another SeqAIJ matrix.
4322 
4323    When d_nz, o_nz parameters are specified, d_nz storage elements are
4324    allocated for every row of the local diagonal submatrix, and o_nz
4325    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4326    One way to choose d_nz and o_nz is to use the max nonzerors per local
4327    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4328    In this case, the values of d_nz,o_nz are:
4329 .vb
4330      proc0 : dnz = 2, o_nz = 2
4331      proc1 : dnz = 3, o_nz = 2
4332      proc2 : dnz = 1, o_nz = 4
4333 .ve
4334    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4335    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4336    for proc3. i.e we are using 12+15+10=37 storage locations to store
4337    34 values.
4338 
4339    When d_nnz, o_nnz parameters are specified, the storage is specified
4340    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4341    In the above case the values for d_nnz,o_nnz are:
4342 .vb
4343      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4344      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4345      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4346 .ve
4347    Here the space allocated is sum of all the above values i.e 34, and
4348    hence pre-allocation is perfect.
4349 
4350    Level: intermediate
4351 
4352 .keywords: matrix, aij, compressed row, sparse, parallel
4353 
4354 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4355           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4356 @*/
4357 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4358 {
4359   PetscErrorCode ierr;
4360 
4361   PetscFunctionBegin;
4362   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4363   PetscValidType(B,1);
4364   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4365   PetscFunctionReturn(0);
4366 }
4367 
4368 #undef __FUNCT__
4369 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4370 /*@
4371      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4372          CSR format the local rows.
4373 
4374    Collective on MPI_Comm
4375 
4376    Input Parameters:
4377 +  comm - MPI communicator
4378 .  m - number of local rows (Cannot be PETSC_DECIDE)
4379 .  n - This value should be the same as the local size used in creating the
4380        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4381        calculated if N is given) For square matrices n is almost always m.
4382 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4383 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4384 .   i - row indices
4385 .   j - column indices
4386 -   a - matrix values
4387 
4388    Output Parameter:
4389 .   mat - the matrix
4390 
4391    Level: intermediate
4392 
4393    Notes:
4394        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4395      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4396      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4397 
4398        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4399 
4400        The format which is used for the sparse matrix input, is equivalent to a
4401     row-major ordering.. i.e for the following matrix, the input data expected is
4402     as shown:
4403 
4404         1 0 0
4405         2 0 3     P0
4406        -------
4407         4 5 6     P1
4408 
4409      Process0 [P0]: rows_owned=[0,1]
4410         i =  {0,1,3}  [size = nrow+1  = 2+1]
4411         j =  {0,0,2}  [size = nz = 6]
4412         v =  {1,2,3}  [size = nz = 6]
4413 
4414      Process1 [P1]: rows_owned=[2]
4415         i =  {0,3}    [size = nrow+1  = 1+1]
4416         j =  {0,1,2}  [size = nz = 6]
4417         v =  {4,5,6}  [size = nz = 6]
4418 
4419 .keywords: matrix, aij, compressed row, sparse, parallel
4420 
4421 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4422           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4423 @*/
4424 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4425 {
4426   PetscErrorCode ierr;
4427 
4428   PetscFunctionBegin;
4429   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4430   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4431   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4432   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4433   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4434   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4435   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4436   PetscFunctionReturn(0);
4437 }
4438 
4439 #undef __FUNCT__
4440 #define __FUNCT__ "MatCreateAIJ"
4441 /*@C
4442    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4443    (the default parallel PETSc format).  For good matrix assembly performance
4444    the user should preallocate the matrix storage by setting the parameters
4445    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4446    performance can be increased by more than a factor of 50.
4447 
4448    Collective on MPI_Comm
4449 
4450    Input Parameters:
4451 +  comm - MPI communicator
4452 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4453            This value should be the same as the local size used in creating the
4454            y vector for the matrix-vector product y = Ax.
4455 .  n - This value should be the same as the local size used in creating the
4456        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4457        calculated if N is given) For square matrices n is almost always m.
4458 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4459 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4460 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4461            (same value is used for all local rows)
4462 .  d_nnz - array containing the number of nonzeros in the various rows of the
4463            DIAGONAL portion of the local submatrix (possibly different for each row)
4464            or NULL, if d_nz is used to specify the nonzero structure.
4465            The size of this array is equal to the number of local rows, i.e 'm'.
4466 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4467            submatrix (same value is used for all local rows).
4468 -  o_nnz - array containing the number of nonzeros in the various rows of the
4469            OFF-DIAGONAL portion of the local submatrix (possibly different for
4470            each row) or NULL, if o_nz is used to specify the nonzero
4471            structure. The size of this array is equal to the number
4472            of local rows, i.e 'm'.
4473 
4474    Output Parameter:
4475 .  A - the matrix
4476 
4477    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4478    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4479    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4480 
4481    Notes:
4482    If the *_nnz parameter is given then the *_nz parameter is ignored
4483 
4484    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4485    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4486    storage requirements for this matrix.
4487 
4488    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4489    processor than it must be used on all processors that share the object for
4490    that argument.
4491 
4492    The user MUST specify either the local or global matrix dimensions
4493    (possibly both).
4494 
4495    The parallel matrix is partitioned across processors such that the
4496    first m0 rows belong to process 0, the next m1 rows belong to
4497    process 1, the next m2 rows belong to process 2 etc.. where
4498    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4499    values corresponding to [m x N] submatrix.
4500 
4501    The columns are logically partitioned with the n0 columns belonging
4502    to 0th partition, the next n1 columns belonging to the next
4503    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4504 
4505    The DIAGONAL portion of the local submatrix on any given processor
4506    is the submatrix corresponding to the rows and columns m,n
4507    corresponding to the given processor. i.e diagonal matrix on
4508    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4509    etc. The remaining portion of the local submatrix [m x (N-n)]
4510    constitute the OFF-DIAGONAL portion. The example below better
4511    illustrates this concept.
4512 
4513    For a square global matrix we define each processor's diagonal portion
4514    to be its local rows and the corresponding columns (a square submatrix);
4515    each processor's off-diagonal portion encompasses the remainder of the
4516    local matrix (a rectangular submatrix).
4517 
4518    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4519 
4520    When calling this routine with a single process communicator, a matrix of
4521    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4522    type of communicator, use the construction mechanism:
4523      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4524 
4525    By default, this format uses inodes (identical nodes) when possible.
4526    We search for consecutive rows with the same nonzero structure, thereby
4527    reusing matrix information to achieve increased efficiency.
4528 
4529    Options Database Keys:
4530 +  -mat_no_inode  - Do not use inodes
4531 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4532 -  -mat_aij_oneindex - Internally use indexing starting at 1
4533         rather than 0.  Note that when calling MatSetValues(),
4534         the user still MUST index entries starting at 0!
4535 
4536 
4537    Example usage:
4538 
4539    Consider the following 8x8 matrix with 34 non-zero values, that is
4540    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4541    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4542    as follows:
4543 
4544 .vb
4545             1  2  0  |  0  3  0  |  0  4
4546     Proc0   0  5  6  |  7  0  0  |  8  0
4547             9  0 10  | 11  0  0  | 12  0
4548     -------------------------------------
4549            13  0 14  | 15 16 17  |  0  0
4550     Proc1   0 18  0  | 19 20 21  |  0  0
4551             0  0  0  | 22 23  0  | 24  0
4552     -------------------------------------
4553     Proc2  25 26 27  |  0  0 28  | 29  0
4554            30  0  0  | 31 32 33  |  0 34
4555 .ve
4556 
4557    This can be represented as a collection of submatrices as:
4558 
4559 .vb
4560       A B C
4561       D E F
4562       G H I
4563 .ve
4564 
4565    Where the submatrices A,B,C are owned by proc0, D,E,F are
4566    owned by proc1, G,H,I are owned by proc2.
4567 
4568    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4569    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4570    The 'M','N' parameters are 8,8, and have the same values on all procs.
4571 
4572    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4573    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4574    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4575    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4576    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4577    matrix, ans [DF] as another SeqAIJ matrix.
4578 
4579    When d_nz, o_nz parameters are specified, d_nz storage elements are
4580    allocated for every row of the local diagonal submatrix, and o_nz
4581    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4582    One way to choose d_nz and o_nz is to use the max nonzerors per local
4583    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4584    In this case, the values of d_nz,o_nz are:
4585 .vb
4586      proc0 : dnz = 2, o_nz = 2
4587      proc1 : dnz = 3, o_nz = 2
4588      proc2 : dnz = 1, o_nz = 4
4589 .ve
4590    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4591    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4592    for proc3. i.e we are using 12+15+10=37 storage locations to store
4593    34 values.
4594 
4595    When d_nnz, o_nnz parameters are specified, the storage is specified
4596    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4597    In the above case the values for d_nnz,o_nnz are:
4598 .vb
4599      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4600      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4601      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4602 .ve
4603    Here the space allocated is sum of all the above values i.e 34, and
4604    hence pre-allocation is perfect.
4605 
4606    Level: intermediate
4607 
4608 .keywords: matrix, aij, compressed row, sparse, parallel
4609 
4610 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4611           MPIAIJ, MatCreateMPIAIJWithArrays()
4612 @*/
4613 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4614 {
4615   PetscErrorCode ierr;
4616   PetscMPIInt    size;
4617 
4618   PetscFunctionBegin;
4619   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4620   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4621   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4622   if (size > 1) {
4623     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4624     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4625   } else {
4626     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4627     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4628   }
4629   PetscFunctionReturn(0);
4630 }
4631 
4632 #undef __FUNCT__
4633 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4634 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4635 {
4636   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4637 
4638   PetscFunctionBegin;
4639   *Ad     = a->A;
4640   *Ao     = a->B;
4641   *colmap = a->garray;
4642   PetscFunctionReturn(0);
4643 }
4644 
4645 #undef __FUNCT__
4646 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4647 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4648 {
4649   PetscErrorCode ierr;
4650   PetscInt       i;
4651   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4652 
4653   PetscFunctionBegin;
4654   if (coloring->ctype == IS_COLORING_GLOBAL) {
4655     ISColoringValue *allcolors,*colors;
4656     ISColoring      ocoloring;
4657 
4658     /* set coloring for diagonal portion */
4659     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4660 
4661     /* set coloring for off-diagonal portion */
4662     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4663     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4664     for (i=0; i<a->B->cmap->n; i++) {
4665       colors[i] = allcolors[a->garray[i]];
4666     }
4667     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4668     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4669     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4670     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4671   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4672     ISColoringValue *colors;
4673     PetscInt        *larray;
4674     ISColoring      ocoloring;
4675 
4676     /* set coloring for diagonal portion */
4677     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4678     for (i=0; i<a->A->cmap->n; i++) {
4679       larray[i] = i + A->cmap->rstart;
4680     }
4681     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4682     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4683     for (i=0; i<a->A->cmap->n; i++) {
4684       colors[i] = coloring->colors[larray[i]];
4685     }
4686     ierr = PetscFree(larray);CHKERRQ(ierr);
4687     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4688     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4689     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4690 
4691     /* set coloring for off-diagonal portion */
4692     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4693     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4694     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4695     for (i=0; i<a->B->cmap->n; i++) {
4696       colors[i] = coloring->colors[larray[i]];
4697     }
4698     ierr = PetscFree(larray);CHKERRQ(ierr);
4699     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4700     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4701     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4702   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4703   PetscFunctionReturn(0);
4704 }
4705 
4706 #undef __FUNCT__
4707 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4708 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4709 {
4710   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4711   PetscErrorCode ierr;
4712 
4713   PetscFunctionBegin;
4714   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4715   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4716   PetscFunctionReturn(0);
4717 }
4718 
4719 #undef __FUNCT__
4720 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4721 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4722 {
4723   PetscErrorCode ierr;
4724   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4725   PetscInt       *indx;
4726 
4727   PetscFunctionBegin;
4728   /* This routine will ONLY return MPIAIJ type matrix */
4729   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4730   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4731   if (n == PETSC_DECIDE) {
4732     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4733   }
4734   /* Check sum(n) = N */
4735   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4736   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4737 
4738   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4739   rstart -= m;
4740 
4741   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4742   for (i=0; i<m; i++) {
4743     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4744     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4745     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4746   }
4747 
4748   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4749   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4750   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4751   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4752   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4753   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4754   PetscFunctionReturn(0);
4755 }
4756 
4757 #undef __FUNCT__
4758 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4759 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4760 {
4761   PetscErrorCode ierr;
4762   PetscInt       m,N,i,rstart,nnz,Ii;
4763   PetscInt       *indx;
4764   PetscScalar    *values;
4765 
4766   PetscFunctionBegin;
4767   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4768   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4769   for (i=0; i<m; i++) {
4770     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4771     Ii   = i + rstart;
4772     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4773     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4774   }
4775   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4776   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4777   PetscFunctionReturn(0);
4778 }
4779 
4780 #undef __FUNCT__
4781 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4782 /*@
4783       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4784                  matrices from each processor
4785 
4786     Collective on MPI_Comm
4787 
4788    Input Parameters:
4789 +    comm - the communicators the parallel matrix will live on
4790 .    inmat - the input sequential matrices
4791 .    n - number of local columns (or PETSC_DECIDE)
4792 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4793 
4794    Output Parameter:
4795 .    outmat - the parallel matrix generated
4796 
4797     Level: advanced
4798 
4799    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4800 
4801 @*/
4802 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4803 {
4804   PetscErrorCode ierr;
4805 
4806   PetscFunctionBegin;
4807   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4808   if (scall == MAT_INITIAL_MATRIX) {
4809     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4810   }
4811   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4812   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4813   PetscFunctionReturn(0);
4814 }
4815 
4816 #undef __FUNCT__
4817 #define __FUNCT__ "MatFileSplit"
4818 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4819 {
4820   PetscErrorCode    ierr;
4821   PetscMPIInt       rank;
4822   PetscInt          m,N,i,rstart,nnz;
4823   size_t            len;
4824   const PetscInt    *indx;
4825   PetscViewer       out;
4826   char              *name;
4827   Mat               B;
4828   const PetscScalar *values;
4829 
4830   PetscFunctionBegin;
4831   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4832   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4833   /* Should this be the type of the diagonal block of A? */
4834   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4835   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4836   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4837   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4838   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4839   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4840   for (i=0; i<m; i++) {
4841     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4842     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4843     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4844   }
4845   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4846   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4847 
4848   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4849   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4850   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4851   sprintf(name,"%s.%d",outfile,rank);
4852   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4853   ierr = PetscFree(name);CHKERRQ(ierr);
4854   ierr = MatView(B,out);CHKERRQ(ierr);
4855   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4856   ierr = MatDestroy(&B);CHKERRQ(ierr);
4857   PetscFunctionReturn(0);
4858 }
4859 
4860 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4861 #undef __FUNCT__
4862 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4863 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4864 {
4865   PetscErrorCode      ierr;
4866   Mat_Merge_SeqsToMPI *merge;
4867   PetscContainer      container;
4868 
4869   PetscFunctionBegin;
4870   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4871   if (container) {
4872     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4873     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4874     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4875     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4876     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4877     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4878     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4879     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4880     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4881     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4882     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4883     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4884     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4885     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4886     ierr = PetscFree(merge);CHKERRQ(ierr);
4887     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4888   }
4889   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4890   PetscFunctionReturn(0);
4891 }
4892 
4893 #include <../src/mat/utils/freespace.h>
4894 #include <petscbt.h>
4895 
4896 #undef __FUNCT__
4897 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4898 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4899 {
4900   PetscErrorCode      ierr;
4901   MPI_Comm            comm;
4902   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4903   PetscMPIInt         size,rank,taga,*len_s;
4904   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4905   PetscInt            proc,m;
4906   PetscInt            **buf_ri,**buf_rj;
4907   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4908   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4909   MPI_Request         *s_waits,*r_waits;
4910   MPI_Status          *status;
4911   MatScalar           *aa=a->a;
4912   MatScalar           **abuf_r,*ba_i;
4913   Mat_Merge_SeqsToMPI *merge;
4914   PetscContainer      container;
4915 
4916   PetscFunctionBegin;
4917   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4918   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4919 
4920   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4921   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4922 
4923   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4924   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4925 
4926   bi     = merge->bi;
4927   bj     = merge->bj;
4928   buf_ri = merge->buf_ri;
4929   buf_rj = merge->buf_rj;
4930 
4931   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4932   owners = merge->rowmap->range;
4933   len_s  = merge->len_s;
4934 
4935   /* send and recv matrix values */
4936   /*-----------------------------*/
4937   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4938   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4939 
4940   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4941   for (proc=0,k=0; proc<size; proc++) {
4942     if (!len_s[proc]) continue;
4943     i    = owners[proc];
4944     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4945     k++;
4946   }
4947 
4948   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4949   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4950   ierr = PetscFree(status);CHKERRQ(ierr);
4951 
4952   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4953   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4954 
4955   /* insert mat values of mpimat */
4956   /*----------------------------*/
4957   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4958   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4959 
4960   for (k=0; k<merge->nrecv; k++) {
4961     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4962     nrows       = *(buf_ri_k[k]);
4963     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4964     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4965   }
4966 
4967   /* set values of ba */
4968   m = merge->rowmap->n;
4969   for (i=0; i<m; i++) {
4970     arow = owners[rank] + i;
4971     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4972     bnzi = bi[i+1] - bi[i];
4973     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4974 
4975     /* add local non-zero vals of this proc's seqmat into ba */
4976     anzi   = ai[arow+1] - ai[arow];
4977     aj     = a->j + ai[arow];
4978     aa     = a->a + ai[arow];
4979     nextaj = 0;
4980     for (j=0; nextaj<anzi; j++) {
4981       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4982         ba_i[j] += aa[nextaj++];
4983       }
4984     }
4985 
4986     /* add received vals into ba */
4987     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4988       /* i-th row */
4989       if (i == *nextrow[k]) {
4990         anzi   = *(nextai[k]+1) - *nextai[k];
4991         aj     = buf_rj[k] + *(nextai[k]);
4992         aa     = abuf_r[k] + *(nextai[k]);
4993         nextaj = 0;
4994         for (j=0; nextaj<anzi; j++) {
4995           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4996             ba_i[j] += aa[nextaj++];
4997           }
4998         }
4999         nextrow[k]++; nextai[k]++;
5000       }
5001     }
5002     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
5003   }
5004   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5005   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5006 
5007   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
5008   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
5009   ierr = PetscFree(ba_i);CHKERRQ(ierr);
5010   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5011   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
5012   PetscFunctionReturn(0);
5013 }
5014 
5015 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
5016 
5017 #undef __FUNCT__
5018 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
5019 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
5020 {
5021   PetscErrorCode      ierr;
5022   Mat                 B_mpi;
5023   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
5024   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
5025   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
5026   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
5027   PetscInt            len,proc,*dnz,*onz,bs,cbs;
5028   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
5029   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
5030   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
5031   MPI_Status          *status;
5032   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
5033   PetscBT             lnkbt;
5034   Mat_Merge_SeqsToMPI *merge;
5035   PetscContainer      container;
5036 
5037   PetscFunctionBegin;
5038   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5039 
5040   /* make sure it is a PETSc comm */
5041   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
5042   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5043   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5044 
5045   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
5046   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
5047 
5048   /* determine row ownership */
5049   /*---------------------------------------------------------*/
5050   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
5051   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
5052   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
5053   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
5054   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
5055   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
5056   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
5057 
5058   m      = merge->rowmap->n;
5059   owners = merge->rowmap->range;
5060 
5061   /* determine the number of messages to send, their lengths */
5062   /*---------------------------------------------------------*/
5063   len_s = merge->len_s;
5064 
5065   len          = 0; /* length of buf_si[] */
5066   merge->nsend = 0;
5067   for (proc=0; proc<size; proc++) {
5068     len_si[proc] = 0;
5069     if (proc == rank) {
5070       len_s[proc] = 0;
5071     } else {
5072       len_si[proc] = owners[proc+1] - owners[proc] + 1;
5073       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
5074     }
5075     if (len_s[proc]) {
5076       merge->nsend++;
5077       nrows = 0;
5078       for (i=owners[proc]; i<owners[proc+1]; i++) {
5079         if (ai[i+1] > ai[i]) nrows++;
5080       }
5081       len_si[proc] = 2*(nrows+1);
5082       len         += len_si[proc];
5083     }
5084   }
5085 
5086   /* determine the number and length of messages to receive for ij-structure */
5087   /*-------------------------------------------------------------------------*/
5088   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
5089   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
5090 
5091   /* post the Irecv of j-structure */
5092   /*-------------------------------*/
5093   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
5094   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
5095 
5096   /* post the Isend of j-structure */
5097   /*--------------------------------*/
5098   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
5099 
5100   for (proc=0, k=0; proc<size; proc++) {
5101     if (!len_s[proc]) continue;
5102     i    = owners[proc];
5103     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
5104     k++;
5105   }
5106 
5107   /* receives and sends of j-structure are complete */
5108   /*------------------------------------------------*/
5109   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
5110   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
5111 
5112   /* send and recv i-structure */
5113   /*---------------------------*/
5114   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
5115   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
5116 
5117   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
5118   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
5119   for (proc=0,k=0; proc<size; proc++) {
5120     if (!len_s[proc]) continue;
5121     /* form outgoing message for i-structure:
5122          buf_si[0]:                 nrows to be sent
5123                [1:nrows]:           row index (global)
5124                [nrows+1:2*nrows+1]: i-structure index
5125     */
5126     /*-------------------------------------------*/
5127     nrows       = len_si[proc]/2 - 1;
5128     buf_si_i    = buf_si + nrows+1;
5129     buf_si[0]   = nrows;
5130     buf_si_i[0] = 0;
5131     nrows       = 0;
5132     for (i=owners[proc]; i<owners[proc+1]; i++) {
5133       anzi = ai[i+1] - ai[i];
5134       if (anzi) {
5135         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
5136         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
5137         nrows++;
5138       }
5139     }
5140     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
5141     k++;
5142     buf_si += len_si[proc];
5143   }
5144 
5145   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
5146   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
5147 
5148   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
5149   for (i=0; i<merge->nrecv; i++) {
5150     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
5151   }
5152 
5153   ierr = PetscFree(len_si);CHKERRQ(ierr);
5154   ierr = PetscFree(len_ri);CHKERRQ(ierr);
5155   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
5156   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
5157   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
5158   ierr = PetscFree(buf_s);CHKERRQ(ierr);
5159   ierr = PetscFree(status);CHKERRQ(ierr);
5160 
5161   /* compute a local seq matrix in each processor */
5162   /*----------------------------------------------*/
5163   /* allocate bi array and free space for accumulating nonzero column info */
5164   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
5165   bi[0] = 0;
5166 
5167   /* create and initialize a linked list */
5168   nlnk = N+1;
5169   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5170 
5171   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5172   len  = ai[owners[rank+1]] - ai[owners[rank]];
5173   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
5174 
5175   current_space = free_space;
5176 
5177   /* determine symbolic info for each local row */
5178   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
5179 
5180   for (k=0; k<merge->nrecv; k++) {
5181     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5182     nrows       = *buf_ri_k[k];
5183     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5184     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5185   }
5186 
5187   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5188   len  = 0;
5189   for (i=0; i<m; i++) {
5190     bnzi = 0;
5191     /* add local non-zero cols of this proc's seqmat into lnk */
5192     arow  = owners[rank] + i;
5193     anzi  = ai[arow+1] - ai[arow];
5194     aj    = a->j + ai[arow];
5195     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5196     bnzi += nlnk;
5197     /* add received col data into lnk */
5198     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5199       if (i == *nextrow[k]) { /* i-th row */
5200         anzi  = *(nextai[k]+1) - *nextai[k];
5201         aj    = buf_rj[k] + *nextai[k];
5202         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5203         bnzi += nlnk;
5204         nextrow[k]++; nextai[k]++;
5205       }
5206     }
5207     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5208 
5209     /* if free space is not available, make more free space */
5210     if (current_space->local_remaining<bnzi) {
5211       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
5212       nspacedouble++;
5213     }
5214     /* copy data into free space, then initialize lnk */
5215     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5216     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5217 
5218     current_space->array           += bnzi;
5219     current_space->local_used      += bnzi;
5220     current_space->local_remaining -= bnzi;
5221 
5222     bi[i+1] = bi[i] + bnzi;
5223   }
5224 
5225   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5226 
5227   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
5228   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5229   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5230 
5231   /* create symbolic parallel matrix B_mpi */
5232   /*---------------------------------------*/
5233   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5234   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5235   if (n==PETSC_DECIDE) {
5236     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5237   } else {
5238     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5239   }
5240   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5241   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5242   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5243   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5244   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5245 
5246   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5247   B_mpi->assembled    = PETSC_FALSE;
5248   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5249   merge->bi           = bi;
5250   merge->bj           = bj;
5251   merge->buf_ri       = buf_ri;
5252   merge->buf_rj       = buf_rj;
5253   merge->coi          = NULL;
5254   merge->coj          = NULL;
5255   merge->owners_co    = NULL;
5256 
5257   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5258 
5259   /* attach the supporting struct to B_mpi for reuse */
5260   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5261   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5262   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5263   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5264   *mpimat = B_mpi;
5265 
5266   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5267   PetscFunctionReturn(0);
5268 }
5269 
5270 #undef __FUNCT__
5271 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
5272 /*@C
5273       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
5274                  matrices from each processor
5275 
5276     Collective on MPI_Comm
5277 
5278    Input Parameters:
5279 +    comm - the communicators the parallel matrix will live on
5280 .    seqmat - the input sequential matrices
5281 .    m - number of local rows (or PETSC_DECIDE)
5282 .    n - number of local columns (or PETSC_DECIDE)
5283 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5284 
5285    Output Parameter:
5286 .    mpimat - the parallel matrix generated
5287 
5288     Level: advanced
5289 
5290    Notes:
5291      The dimensions of the sequential matrix in each processor MUST be the same.
5292      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5293      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5294 @*/
5295 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5296 {
5297   PetscErrorCode ierr;
5298   PetscMPIInt    size;
5299 
5300   PetscFunctionBegin;
5301   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5302   if (size == 1) {
5303     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5304     if (scall == MAT_INITIAL_MATRIX) {
5305       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5306     } else {
5307       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5308     }
5309     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5310     PetscFunctionReturn(0);
5311   }
5312   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5313   if (scall == MAT_INITIAL_MATRIX) {
5314     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5315   }
5316   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5317   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5318   PetscFunctionReturn(0);
5319 }
5320 
5321 #undef __FUNCT__
5322 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5323 /*@
5324      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5325           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5326           with MatGetSize()
5327 
5328     Not Collective
5329 
5330    Input Parameters:
5331 +    A - the matrix
5332 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5333 
5334    Output Parameter:
5335 .    A_loc - the local sequential matrix generated
5336 
5337     Level: developer
5338 
5339 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5340 
5341 @*/
5342 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5343 {
5344   PetscErrorCode ierr;
5345   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5346   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5347   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5348   MatScalar      *aa=a->a,*ba=b->a,*cam;
5349   PetscScalar    *ca;
5350   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5351   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5352   PetscBool      match;
5353 
5354   PetscFunctionBegin;
5355   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5356   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5357   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5358   if (scall == MAT_INITIAL_MATRIX) {
5359     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5360     ci[0] = 0;
5361     for (i=0; i<am; i++) {
5362       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5363     }
5364     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5365     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5366     k    = 0;
5367     for (i=0; i<am; i++) {
5368       ncols_o = bi[i+1] - bi[i];
5369       ncols_d = ai[i+1] - ai[i];
5370       /* off-diagonal portion of A */
5371       for (jo=0; jo<ncols_o; jo++) {
5372         col = cmap[*bj];
5373         if (col >= cstart) break;
5374         cj[k]   = col; bj++;
5375         ca[k++] = *ba++;
5376       }
5377       /* diagonal portion of A */
5378       for (j=0; j<ncols_d; j++) {
5379         cj[k]   = cstart + *aj++;
5380         ca[k++] = *aa++;
5381       }
5382       /* off-diagonal portion of A */
5383       for (j=jo; j<ncols_o; j++) {
5384         cj[k]   = cmap[*bj++];
5385         ca[k++] = *ba++;
5386       }
5387     }
5388     /* put together the new matrix */
5389     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5390     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5391     /* Since these are PETSc arrays, change flags to free them as necessary. */
5392     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5393     mat->free_a  = PETSC_TRUE;
5394     mat->free_ij = PETSC_TRUE;
5395     mat->nonew   = 0;
5396   } else if (scall == MAT_REUSE_MATRIX) {
5397     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5398     ci = mat->i; cj = mat->j; cam = mat->a;
5399     for (i=0; i<am; i++) {
5400       /* off-diagonal portion of A */
5401       ncols_o = bi[i+1] - bi[i];
5402       for (jo=0; jo<ncols_o; jo++) {
5403         col = cmap[*bj];
5404         if (col >= cstart) break;
5405         *cam++ = *ba++; bj++;
5406       }
5407       /* diagonal portion of A */
5408       ncols_d = ai[i+1] - ai[i];
5409       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5410       /* off-diagonal portion of A */
5411       for (j=jo; j<ncols_o; j++) {
5412         *cam++ = *ba++; bj++;
5413       }
5414     }
5415   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5416   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5417   PetscFunctionReturn(0);
5418 }
5419 
5420 #undef __FUNCT__
5421 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5422 /*@C
5423      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5424 
5425     Not Collective
5426 
5427    Input Parameters:
5428 +    A - the matrix
5429 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5430 -    row, col - index sets of rows and columns to extract (or NULL)
5431 
5432    Output Parameter:
5433 .    A_loc - the local sequential matrix generated
5434 
5435     Level: developer
5436 
5437 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5438 
5439 @*/
5440 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5441 {
5442   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5443   PetscErrorCode ierr;
5444   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5445   IS             isrowa,iscola;
5446   Mat            *aloc;
5447   PetscBool      match;
5448 
5449   PetscFunctionBegin;
5450   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5451   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5452   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5453   if (!row) {
5454     start = A->rmap->rstart; end = A->rmap->rend;
5455     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5456   } else {
5457     isrowa = *row;
5458   }
5459   if (!col) {
5460     start = A->cmap->rstart;
5461     cmap  = a->garray;
5462     nzA   = a->A->cmap->n;
5463     nzB   = a->B->cmap->n;
5464     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5465     ncols = 0;
5466     for (i=0; i<nzB; i++) {
5467       if (cmap[i] < start) idx[ncols++] = cmap[i];
5468       else break;
5469     }
5470     imark = i;
5471     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5472     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5473     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5474   } else {
5475     iscola = *col;
5476   }
5477   if (scall != MAT_INITIAL_MATRIX) {
5478     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5479     aloc[0] = *A_loc;
5480   }
5481   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5482   *A_loc = aloc[0];
5483   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5484   if (!row) {
5485     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5486   }
5487   if (!col) {
5488     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5489   }
5490   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5491   PetscFunctionReturn(0);
5492 }
5493 
5494 #undef __FUNCT__
5495 #define __FUNCT__ "MatGetBrowsOfAcols"
5496 /*@C
5497     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5498 
5499     Collective on Mat
5500 
5501    Input Parameters:
5502 +    A,B - the matrices in mpiaij format
5503 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5504 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5505 
5506    Output Parameter:
5507 +    rowb, colb - index sets of rows and columns of B to extract
5508 -    B_seq - the sequential matrix generated
5509 
5510     Level: developer
5511 
5512 @*/
5513 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5514 {
5515   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5516   PetscErrorCode ierr;
5517   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5518   IS             isrowb,iscolb;
5519   Mat            *bseq=NULL;
5520 
5521   PetscFunctionBegin;
5522   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5523     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5524   }
5525   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5526 
5527   if (scall == MAT_INITIAL_MATRIX) {
5528     start = A->cmap->rstart;
5529     cmap  = a->garray;
5530     nzA   = a->A->cmap->n;
5531     nzB   = a->B->cmap->n;
5532     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5533     ncols = 0;
5534     for (i=0; i<nzB; i++) {  /* row < local row index */
5535       if (cmap[i] < start) idx[ncols++] = cmap[i];
5536       else break;
5537     }
5538     imark = i;
5539     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5540     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5541     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5542     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5543   } else {
5544     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5545     isrowb  = *rowb; iscolb = *colb;
5546     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5547     bseq[0] = *B_seq;
5548   }
5549   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5550   *B_seq = bseq[0];
5551   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5552   if (!rowb) {
5553     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5554   } else {
5555     *rowb = isrowb;
5556   }
5557   if (!colb) {
5558     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5559   } else {
5560     *colb = iscolb;
5561   }
5562   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5563   PetscFunctionReturn(0);
5564 }
5565 
5566 #undef __FUNCT__
5567 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5568 /*
5569     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5570     of the OFF-DIAGONAL portion of local A
5571 
5572     Collective on Mat
5573 
5574    Input Parameters:
5575 +    A,B - the matrices in mpiaij format
5576 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5577 
5578    Output Parameter:
5579 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5580 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5581 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5582 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5583 
5584     Level: developer
5585 
5586 */
5587 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5588 {
5589   VecScatter_MPI_General *gen_to,*gen_from;
5590   PetscErrorCode         ierr;
5591   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5592   Mat_SeqAIJ             *b_oth;
5593   VecScatter             ctx =a->Mvctx;
5594   MPI_Comm               comm;
5595   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5596   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5597   PetscScalar            *rvalues,*svalues;
5598   MatScalar              *b_otha,*bufa,*bufA;
5599   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5600   MPI_Request            *rwaits = NULL,*swaits = NULL;
5601   MPI_Status             *sstatus,rstatus;
5602   PetscMPIInt            jj;
5603   PetscInt               *cols,sbs,rbs;
5604   PetscScalar            *vals;
5605 
5606   PetscFunctionBegin;
5607   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5608   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5609     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5610   }
5611   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5612   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5613 
5614   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5615   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5616   rvalues  = gen_from->values; /* holds the length of receiving row */
5617   svalues  = gen_to->values;   /* holds the length of sending row */
5618   nrecvs   = gen_from->n;
5619   nsends   = gen_to->n;
5620 
5621   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5622   srow    = gen_to->indices;    /* local row index to be sent */
5623   sstarts = gen_to->starts;
5624   sprocs  = gen_to->procs;
5625   sstatus = gen_to->sstatus;
5626   sbs     = gen_to->bs;
5627   rstarts = gen_from->starts;
5628   rprocs  = gen_from->procs;
5629   rbs     = gen_from->bs;
5630 
5631   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5632   if (scall == MAT_INITIAL_MATRIX) {
5633     /* i-array */
5634     /*---------*/
5635     /*  post receives */
5636     for (i=0; i<nrecvs; i++) {
5637       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5638       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5639       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5640     }
5641 
5642     /* pack the outgoing message */
5643     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5644 
5645     sstartsj[0] = 0;
5646     rstartsj[0] = 0;
5647     len         = 0; /* total length of j or a array to be sent */
5648     k           = 0;
5649     for (i=0; i<nsends; i++) {
5650       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5651       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5652       for (j=0; j<nrows; j++) {
5653         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5654         for (l=0; l<sbs; l++) {
5655           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5656 
5657           rowlen[j*sbs+l] = ncols;
5658 
5659           len += ncols;
5660           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5661         }
5662         k++;
5663       }
5664       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5665 
5666       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5667     }
5668     /* recvs and sends of i-array are completed */
5669     i = nrecvs;
5670     while (i--) {
5671       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5672     }
5673     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5674 
5675     /* allocate buffers for sending j and a arrays */
5676     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5677     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5678 
5679     /* create i-array of B_oth */
5680     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5681 
5682     b_othi[0] = 0;
5683     len       = 0; /* total length of j or a array to be received */
5684     k         = 0;
5685     for (i=0; i<nrecvs; i++) {
5686       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5687       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5688       for (j=0; j<nrows; j++) {
5689         b_othi[k+1] = b_othi[k] + rowlen[j];
5690         len        += rowlen[j]; k++;
5691       }
5692       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5693     }
5694 
5695     /* allocate space for j and a arrrays of B_oth */
5696     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5697     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5698 
5699     /* j-array */
5700     /*---------*/
5701     /*  post receives of j-array */
5702     for (i=0; i<nrecvs; i++) {
5703       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5704       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5705     }
5706 
5707     /* pack the outgoing message j-array */
5708     k = 0;
5709     for (i=0; i<nsends; i++) {
5710       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5711       bufJ  = bufj+sstartsj[i];
5712       for (j=0; j<nrows; j++) {
5713         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5714         for (ll=0; ll<sbs; ll++) {
5715           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5716           for (l=0; l<ncols; l++) {
5717             *bufJ++ = cols[l];
5718           }
5719           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5720         }
5721       }
5722       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5723     }
5724 
5725     /* recvs and sends of j-array are completed */
5726     i = nrecvs;
5727     while (i--) {
5728       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5729     }
5730     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5731   } else if (scall == MAT_REUSE_MATRIX) {
5732     sstartsj = *startsj_s;
5733     rstartsj = *startsj_r;
5734     bufa     = *bufa_ptr;
5735     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5736     b_otha   = b_oth->a;
5737   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5738 
5739   /* a-array */
5740   /*---------*/
5741   /*  post receives of a-array */
5742   for (i=0; i<nrecvs; i++) {
5743     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5744     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5745   }
5746 
5747   /* pack the outgoing message a-array */
5748   k = 0;
5749   for (i=0; i<nsends; i++) {
5750     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5751     bufA  = bufa+sstartsj[i];
5752     for (j=0; j<nrows; j++) {
5753       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5754       for (ll=0; ll<sbs; ll++) {
5755         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5756         for (l=0; l<ncols; l++) {
5757           *bufA++ = vals[l];
5758         }
5759         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5760       }
5761     }
5762     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5763   }
5764   /* recvs and sends of a-array are completed */
5765   i = nrecvs;
5766   while (i--) {
5767     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5768   }
5769   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5770   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5771 
5772   if (scall == MAT_INITIAL_MATRIX) {
5773     /* put together the new matrix */
5774     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5775 
5776     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5777     /* Since these are PETSc arrays, change flags to free them as necessary. */
5778     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5779     b_oth->free_a  = PETSC_TRUE;
5780     b_oth->free_ij = PETSC_TRUE;
5781     b_oth->nonew   = 0;
5782 
5783     ierr = PetscFree(bufj);CHKERRQ(ierr);
5784     if (!startsj_s || !bufa_ptr) {
5785       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5786       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5787     } else {
5788       *startsj_s = sstartsj;
5789       *startsj_r = rstartsj;
5790       *bufa_ptr  = bufa;
5791     }
5792   }
5793   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5794   PetscFunctionReturn(0);
5795 }
5796 
5797 #undef __FUNCT__
5798 #define __FUNCT__ "MatGetCommunicationStructs"
5799 /*@C
5800   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5801 
5802   Not Collective
5803 
5804   Input Parameters:
5805 . A - The matrix in mpiaij format
5806 
5807   Output Parameter:
5808 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5809 . colmap - A map from global column index to local index into lvec
5810 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5811 
5812   Level: developer
5813 
5814 @*/
5815 #if defined(PETSC_USE_CTABLE)
5816 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5817 #else
5818 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5819 #endif
5820 {
5821   Mat_MPIAIJ *a;
5822 
5823   PetscFunctionBegin;
5824   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5825   PetscValidPointer(lvec, 2);
5826   PetscValidPointer(colmap, 3);
5827   PetscValidPointer(multScatter, 4);
5828   a = (Mat_MPIAIJ*) A->data;
5829   if (lvec) *lvec = a->lvec;
5830   if (colmap) *colmap = a->colmap;
5831   if (multScatter) *multScatter = a->Mvctx;
5832   PetscFunctionReturn(0);
5833 }
5834 
5835 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5836 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5837 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5838 
5839 #undef __FUNCT__
5840 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5841 /*
5842     Computes (B'*A')' since computing B*A directly is untenable
5843 
5844                n                       p                          p
5845         (              )       (              )         (                  )
5846       m (      A       )  *  n (       B      )   =   m (         C        )
5847         (              )       (              )         (                  )
5848 
5849 */
5850 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5851 {
5852   PetscErrorCode ierr;
5853   Mat            At,Bt,Ct;
5854 
5855   PetscFunctionBegin;
5856   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5857   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5858   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5859   ierr = MatDestroy(&At);CHKERRQ(ierr);
5860   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5861   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5862   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5863   PetscFunctionReturn(0);
5864 }
5865 
5866 #undef __FUNCT__
5867 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5868 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5869 {
5870   PetscErrorCode ierr;
5871   PetscInt       m=A->rmap->n,n=B->cmap->n;
5872   Mat            Cmat;
5873 
5874   PetscFunctionBegin;
5875   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5876   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5877   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5878   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5879   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5880   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5881   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5882   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5883 
5884   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5885 
5886   *C = Cmat;
5887   PetscFunctionReturn(0);
5888 }
5889 
5890 /* ----------------------------------------------------------------*/
5891 #undef __FUNCT__
5892 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5893 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5894 {
5895   PetscErrorCode ierr;
5896 
5897   PetscFunctionBegin;
5898   if (scall == MAT_INITIAL_MATRIX) {
5899     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5900     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5901     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5902   }
5903   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5904   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5905   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5906   PetscFunctionReturn(0);
5907 }
5908 
5909 #if defined(PETSC_HAVE_MUMPS)
5910 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5911 #endif
5912 #if defined(PETSC_HAVE_PASTIX)
5913 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5914 #endif
5915 #if defined(PETSC_HAVE_SUPERLU_DIST)
5916 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5917 #endif
5918 #if defined(PETSC_HAVE_CLIQUE)
5919 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5920 #endif
5921 
5922 /*MC
5923    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5924 
5925    Options Database Keys:
5926 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5927 
5928   Level: beginner
5929 
5930 .seealso: MatCreateAIJ()
5931 M*/
5932 
5933 #undef __FUNCT__
5934 #define __FUNCT__ "MatCreate_MPIAIJ"
5935 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5936 {
5937   Mat_MPIAIJ     *b;
5938   PetscErrorCode ierr;
5939   PetscMPIInt    size;
5940 
5941   PetscFunctionBegin;
5942   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5943 
5944   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5945   B->data       = (void*)b;
5946   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5947   B->assembled  = PETSC_FALSE;
5948   B->insertmode = NOT_SET_VALUES;
5949   b->size       = size;
5950 
5951   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5952 
5953   /* build cache for off array entries formed */
5954   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5955 
5956   b->donotstash  = PETSC_FALSE;
5957   b->colmap      = 0;
5958   b->garray      = 0;
5959   b->roworiented = PETSC_TRUE;
5960 
5961   /* stuff used for matrix vector multiply */
5962   b->lvec  = NULL;
5963   b->Mvctx = NULL;
5964 
5965   /* stuff for MatGetRow() */
5966   b->rowindices   = 0;
5967   b->rowvalues    = 0;
5968   b->getrowactive = PETSC_FALSE;
5969 
5970   /* flexible pointer used in CUSP/CUSPARSE classes */
5971   b->spptr = NULL;
5972 
5973 #if defined(PETSC_HAVE_MUMPS)
5974   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5975 #endif
5976 #if defined(PETSC_HAVE_PASTIX)
5977   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5978 #endif
5979 #if defined(PETSC_HAVE_SUPERLU_DIST)
5980   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5981 #endif
5982 #if defined(PETSC_HAVE_CLIQUE)
5983   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5984 #endif
5985   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5986   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5987   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5988   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5989   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5990   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5991   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5992   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5993   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5994   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5995   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5996   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5997   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5998   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5999   PetscFunctionReturn(0);
6000 }
6001 
6002 #undef __FUNCT__
6003 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
6004 /*@
6005      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6006          and "off-diagonal" part of the matrix in CSR format.
6007 
6008    Collective on MPI_Comm
6009 
6010    Input Parameters:
6011 +  comm - MPI communicator
6012 .  m - number of local rows (Cannot be PETSC_DECIDE)
6013 .  n - This value should be the same as the local size used in creating the
6014        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6015        calculated if N is given) For square matrices n is almost always m.
6016 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6017 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6018 .   i - row indices for "diagonal" portion of matrix
6019 .   j - column indices
6020 .   a - matrix values
6021 .   oi - row indices for "off-diagonal" portion of matrix
6022 .   oj - column indices
6023 -   oa - matrix values
6024 
6025    Output Parameter:
6026 .   mat - the matrix
6027 
6028    Level: advanced
6029 
6030    Notes:
6031        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6032        must free the arrays once the matrix has been destroyed and not before.
6033 
6034        The i and j indices are 0 based
6035 
6036        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6037 
6038        This sets local rows and cannot be used to set off-processor values.
6039 
6040        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6041        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6042        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6043        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6044        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6045        communication if it is known that only local entries will be set.
6046 
6047 .keywords: matrix, aij, compressed row, sparse, parallel
6048 
6049 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6050           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6051 @*/
6052 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6053 {
6054   PetscErrorCode ierr;
6055   Mat_MPIAIJ     *maij;
6056 
6057   PetscFunctionBegin;
6058   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6059   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6060   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6061   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6062   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6063   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6064   maij = (Mat_MPIAIJ*) (*mat)->data;
6065 
6066   (*mat)->preallocated = PETSC_TRUE;
6067 
6068   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6069   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6070 
6071   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6072   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6073 
6074   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6075   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6076   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6077   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6078 
6079   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6080   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6081   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6082   PetscFunctionReturn(0);
6083 }
6084 
6085 /*
6086     Special version for direct calls from Fortran
6087 */
6088 #include <petsc-private/fortranimpl.h>
6089 
6090 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6091 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6092 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6093 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6094 #endif
6095 
6096 /* Change these macros so can be used in void function */
6097 #undef CHKERRQ
6098 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6099 #undef SETERRQ2
6100 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6101 #undef SETERRQ3
6102 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6103 #undef SETERRQ
6104 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6105 
6106 #undef __FUNCT__
6107 #define __FUNCT__ "matsetvaluesmpiaij_"
6108 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6109 {
6110   Mat            mat  = *mmat;
6111   PetscInt       m    = *mm, n = *mn;
6112   InsertMode     addv = *maddv;
6113   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6114   PetscScalar    value;
6115   PetscErrorCode ierr;
6116 
6117   MatCheckPreallocated(mat,1);
6118   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6119 
6120 #if defined(PETSC_USE_DEBUG)
6121   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6122 #endif
6123   {
6124     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6125     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6126     PetscBool roworiented = aij->roworiented;
6127 
6128     /* Some Variables required in the macro */
6129     Mat        A                 = aij->A;
6130     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6131     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6132     MatScalar  *aa               = a->a;
6133     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6134     Mat        B                 = aij->B;
6135     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6136     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6137     MatScalar  *ba               = b->a;
6138 
6139     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6140     PetscInt  nonew = a->nonew;
6141     MatScalar *ap1,*ap2;
6142 
6143     PetscFunctionBegin;
6144     for (i=0; i<m; i++) {
6145       if (im[i] < 0) continue;
6146 #if defined(PETSC_USE_DEBUG)
6147       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6148 #endif
6149       if (im[i] >= rstart && im[i] < rend) {
6150         row      = im[i] - rstart;
6151         lastcol1 = -1;
6152         rp1      = aj + ai[row];
6153         ap1      = aa + ai[row];
6154         rmax1    = aimax[row];
6155         nrow1    = ailen[row];
6156         low1     = 0;
6157         high1    = nrow1;
6158         lastcol2 = -1;
6159         rp2      = bj + bi[row];
6160         ap2      = ba + bi[row];
6161         rmax2    = bimax[row];
6162         nrow2    = bilen[row];
6163         low2     = 0;
6164         high2    = nrow2;
6165 
6166         for (j=0; j<n; j++) {
6167           if (roworiented) value = v[i*n+j];
6168           else value = v[i+j*m];
6169           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
6170           if (in[j] >= cstart && in[j] < cend) {
6171             col = in[j] - cstart;
6172             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
6173           } else if (in[j] < 0) continue;
6174 #if defined(PETSC_USE_DEBUG)
6175           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6176 #endif
6177           else {
6178             if (mat->was_assembled) {
6179               if (!aij->colmap) {
6180                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6181               }
6182 #if defined(PETSC_USE_CTABLE)
6183               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6184               col--;
6185 #else
6186               col = aij->colmap[in[j]] - 1;
6187 #endif
6188               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6189                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6190                 col  =  in[j];
6191                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6192                 B     = aij->B;
6193                 b     = (Mat_SeqAIJ*)B->data;
6194                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6195                 rp2   = bj + bi[row];
6196                 ap2   = ba + bi[row];
6197                 rmax2 = bimax[row];
6198                 nrow2 = bilen[row];
6199                 low2  = 0;
6200                 high2 = nrow2;
6201                 bm    = aij->B->rmap->n;
6202                 ba    = b->a;
6203               }
6204             } else col = in[j];
6205             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
6206           }
6207         }
6208       } else if (!aij->donotstash) {
6209         if (roworiented) {
6210           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6211         } else {
6212           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6213         }
6214       }
6215     }
6216   }
6217   PetscFunctionReturnVoid();
6218 }
6219 
6220