xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5cc03489524fe93acaaea09abcf103fc3e31a8ab)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatDestroy_MatRedundant"
2512 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2513 {
2514   PetscErrorCode ierr;
2515   Mat_Redundant  *redund;
2516   PetscInt       i;
2517   PetscMPIInt    size;
2518 
2519   PetscFunctionBegin;
2520   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2521   if (size == 1) {
2522     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2523     redund = a->redundant;
2524   } else {
2525     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2526     redund = a->redundant;
2527   }
2528   if (redund){
2529     ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2530     ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2531     ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2532     for (i=0; i<redund->nrecvs; i++) {
2533       ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2534       ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2535     }
2536     ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2537     ierr = redund->Destroy(A);CHKERRQ(ierr);
2538     ierr = PetscFree(redund);CHKERRQ(ierr);
2539   }
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 #undef __FUNCT__
2544 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2545 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2546 {
2547   PetscMPIInt    rank,size;
2548   MPI_Comm       comm,subcomm=psubcomm->comm;
2549   PetscErrorCode ierr;
2550   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2551   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2552   PetscInt       *rowrange = mat->rmap->range;
2553   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2554   Mat            A = aij->A,B=aij->B,C=*matredundant;
2555   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2556   PetscScalar    *sbuf_a;
2557   PetscInt       nzlocal=a->nz+b->nz;
2558   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2559   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2560   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2561   MatScalar      *aworkA,*aworkB;
2562   PetscScalar    *vals;
2563   PetscMPIInt    tag1,tag2,tag3,imdex;
2564   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2565   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2566   MPI_Status     recv_status,*send_status;
2567   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2568   PetscInt       **rbuf_j=NULL;
2569   PetscScalar    **rbuf_a=NULL;
2570   Mat_Redundant  *redund =NULL;
2571   PetscInt       rstart_sub,rend_sub,mloc_sub;
2572 
2573   PetscFunctionBegin;
2574   if (psubcomm->type != PETSC_SUBCOMM_INTERLACED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Input psubcomm must be PETSC_SUBCOMM_INTERLACED type");
2575   ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr);
2576   if (M != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-square matrix yet, rows %D columns %D",M,N);
2577 
2578   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2579   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2580   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2581   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2582   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2583 
2584   if (reuse == MAT_REUSE_MATRIX) {
2585     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2586     if (subsize == 1) {
2587       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2588       redund = c->redundant;
2589     } else {
2590       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2591       redund = c->redundant;
2592     }
2593     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2594 
2595     nsends    = redund->nsends;
2596     nrecvs    = redund->nrecvs;
2597     send_rank = redund->send_rank;
2598     recv_rank = redund->recv_rank;
2599     sbuf_nz   = redund->sbuf_nz;
2600     rbuf_nz   = redund->rbuf_nz;
2601     sbuf_j    = redund->sbuf_j;
2602     sbuf_a    = redund->sbuf_a;
2603     rbuf_j    = redund->rbuf_j;
2604     rbuf_a    = redund->rbuf_a;
2605   }
2606 
2607   if (reuse == MAT_INITIAL_MATRIX) {
2608     PetscInt    nleftover,np_subcomm;
2609 
2610     /* get local size of redundant matrix */
2611     const PetscInt *range;
2612     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2613     rstart_sub = range[nsubcomm*subrank];
2614     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2615       rend_sub = range[nsubcomm*(subrank+1)];
2616     } else {
2617       rend_sub = mat->rmap->N;
2618     }
2619     mloc_sub = rend_sub - rstart_sub;
2620 
2621     /* get the destination processors' id send_rank, nsends and nrecvs */
2622     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2623 
2624     np_subcomm = size/nsubcomm;
2625     nleftover  = size - nsubcomm*np_subcomm;
2626 
2627     nsends = 0; nrecvs = 0;
2628     for (i=0; i<size; i++) {
2629       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2630         send_rank[nsends]   = i; nsends++;
2631         recv_rank[nrecvs++] = i;
2632       }
2633     }
2634     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2635       i = size-nleftover-1;
2636       j = 0;
2637       while (j < nsubcomm - nleftover) {
2638         send_rank[nsends++] = i;
2639         i--; j++;
2640       }
2641     }
2642 
2643     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2644       for (i=0; i<nleftover; i++) {
2645         recv_rank[nrecvs++] = size-nleftover+i;
2646       }
2647     }
2648 
2649     /* allocate sbuf_j, sbuf_a */
2650     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2651     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2652     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2653   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2654 
2655   /* copy mat's local entries into the buffers */
2656   if (reuse == MAT_INITIAL_MATRIX) {
2657     rownz_max = 0;
2658     rptr      = sbuf_j;
2659     cols      = sbuf_j + rend-rstart + 1;
2660     vals      = sbuf_a;
2661     rptr[0]   = 0;
2662     for (i=0; i<rend-rstart; i++) {
2663       row    = i + rstart;
2664       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2665       ncols  = nzA + nzB;
2666       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2667       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2668       /* load the column indices for this row into cols */
2669       lwrite = 0;
2670       for (l=0; l<nzB; l++) {
2671         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2672           vals[lwrite]   = aworkB[l];
2673           cols[lwrite++] = ctmp;
2674         }
2675       }
2676       for (l=0; l<nzA; l++) {
2677         vals[lwrite]   = aworkA[l];
2678         cols[lwrite++] = cstart + cworkA[l];
2679       }
2680       for (l=0; l<nzB; l++) {
2681         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2682           vals[lwrite]   = aworkB[l];
2683           cols[lwrite++] = ctmp;
2684         }
2685       }
2686       vals     += ncols;
2687       cols     += ncols;
2688       rptr[i+1] = rptr[i] + ncols;
2689       if (rownz_max < ncols) rownz_max = ncols;
2690     }
2691     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2692   } else { /* only copy matrix values into sbuf_a */
2693     rptr    = sbuf_j;
2694     vals    = sbuf_a;
2695     rptr[0] = 0;
2696     for (i=0; i<rend-rstart; i++) {
2697       row    = i + rstart;
2698       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2699       ncols  = nzA + nzB;
2700       cworkB = b->j + b->i[i];
2701       aworkA = a->a + a->i[i];
2702       aworkB = b->a + b->i[i];
2703       lwrite = 0;
2704       for (l=0; l<nzB; l++) {
2705         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2706       }
2707       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2708       for (l=0; l<nzB; l++) {
2709         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2710       }
2711       vals     += ncols;
2712       rptr[i+1] = rptr[i] + ncols;
2713     }
2714   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2715 
2716   /* send nzlocal to others, and recv other's nzlocal */
2717   /*--------------------------------------------------*/
2718   if (reuse == MAT_INITIAL_MATRIX) {
2719     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2720 
2721     s_waits2 = s_waits3 + nsends;
2722     s_waits1 = s_waits2 + nsends;
2723     r_waits1 = s_waits1 + nsends;
2724     r_waits2 = r_waits1 + nrecvs;
2725     r_waits3 = r_waits2 + nrecvs;
2726   } else {
2727     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2728 
2729     r_waits3 = s_waits3 + nsends;
2730   }
2731 
2732   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2733   if (reuse == MAT_INITIAL_MATRIX) {
2734     /* get new tags to keep the communication clean */
2735     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2736     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2737     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2738 
2739     /* post receives of other's nzlocal */
2740     for (i=0; i<nrecvs; i++) {
2741       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2742     }
2743     /* send nzlocal to others */
2744     for (i=0; i<nsends; i++) {
2745       sbuf_nz[i] = nzlocal;
2746       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2747     }
2748     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2749     count = nrecvs;
2750     while (count) {
2751       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2752 
2753       recv_rank[imdex] = recv_status.MPI_SOURCE;
2754       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2755       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2756 
2757       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2758 
2759       rbuf_nz[imdex] += i + 2;
2760 
2761       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2762       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2763       count--;
2764     }
2765     /* wait on sends of nzlocal */
2766     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2767     /* send mat->i,j to others, and recv from other's */
2768     /*------------------------------------------------*/
2769     for (i=0; i<nsends; i++) {
2770       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2771       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2772     }
2773     /* wait on receives of mat->i,j */
2774     /*------------------------------*/
2775     count = nrecvs;
2776     while (count) {
2777       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2778       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2779       count--;
2780     }
2781     /* wait on sends of mat->i,j */
2782     /*---------------------------*/
2783     if (nsends) {
2784       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2785     }
2786   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2787 
2788   /* post receives, send and receive mat->a */
2789   /*----------------------------------------*/
2790   for (imdex=0; imdex<nrecvs; imdex++) {
2791     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2792   }
2793   for (i=0; i<nsends; i++) {
2794     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2795   }
2796   count = nrecvs;
2797   while (count) {
2798     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2799     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2800     count--;
2801   }
2802   if (nsends) {
2803     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2804   }
2805 
2806   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2807 
2808   /* create redundant matrix */
2809   /*-------------------------*/
2810   if (reuse == MAT_INITIAL_MATRIX) {
2811     /* compute rownz_max for preallocation */
2812     for (imdex=0; imdex<nrecvs; imdex++) {
2813       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2814       rptr = rbuf_j[imdex];
2815       for (i=0; i<j; i++) {
2816         ncols = rptr[i+1] - rptr[i];
2817         if (rownz_max < ncols) rownz_max = ncols;
2818       }
2819     }
2820 
2821     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2822     ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2823     /* ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DETERMINE,mat->cmap->N);CHKERRQ(ierr); -- give incorrect local column! */
2824     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2825     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2826     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2827     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2828   } else {
2829     C = *matredundant;
2830   }
2831 
2832   /* insert local matrix entries */
2833   rptr = sbuf_j;
2834   cols = sbuf_j + rend-rstart + 1;
2835   vals = sbuf_a;
2836   for (i=0; i<rend-rstart; i++) {
2837     row   = i + rstart;
2838     ncols = rptr[i+1] - rptr[i];
2839     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2840     vals += ncols;
2841     cols += ncols;
2842   }
2843   /* insert received matrix entries */
2844   for (imdex=0; imdex<nrecvs; imdex++) {
2845     rstart = rowrange[recv_rank[imdex]];
2846     rend   = rowrange[recv_rank[imdex]+1];
2847     rptr   = rbuf_j[imdex];
2848     cols   = rbuf_j[imdex] + rend-rstart + 1;
2849     vals   = rbuf_a[imdex];
2850     for (i=0; i<rend-rstart; i++) {
2851       row   = i + rstart;
2852       ncols = rptr[i+1] - rptr[i];
2853       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2854       vals += ncols;
2855       cols += ncols;
2856     }
2857   }
2858   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2859   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2860 
2861   if (reuse == MAT_INITIAL_MATRIX) {
2862     *matredundant = C;
2863 
2864     /* create a supporting struct and attach it to C for reuse */
2865     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2866     if (subsize == 1) {
2867       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2868       c->redundant = redund;
2869     } else {
2870       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2871       c->redundant = redund;
2872     }
2873 
2874     redund->nzlocal   = nzlocal;
2875     redund->nsends    = nsends;
2876     redund->nrecvs    = nrecvs;
2877     redund->send_rank = send_rank;
2878     redund->recv_rank = recv_rank;
2879     redund->sbuf_nz   = sbuf_nz;
2880     redund->rbuf_nz   = rbuf_nz;
2881     redund->sbuf_j    = sbuf_j;
2882     redund->sbuf_a    = sbuf_a;
2883     redund->rbuf_j    = rbuf_j;
2884     redund->rbuf_a    = rbuf_a;
2885 
2886     redund->Destroy = C->ops->destroy;
2887     C->ops->destroy = MatDestroy_MatRedundant;
2888   }
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 #undef __FUNCT__
2893 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2894 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2895 {
2896   PetscErrorCode ierr;
2897   MPI_Comm       comm;
2898   PetscMPIInt    size;
2899 
2900   PetscFunctionBegin;
2901   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2902   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2903   /* Only MatGetRedundantMatrix_MPIAIJ_interlaced() is written now */
2904   if (!subcomm || nsubcomm == size) { /* create psubcomm */
2905     PetscSubcomm psubcomm;
2906     ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2907     ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2908     ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr);
2909     ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr);
2910     /* destroy psubcomm? */
2911   } else {
2912     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet");
2913   }
2914   PetscFunctionReturn(0);
2915 }
2916 
2917 #undef __FUNCT__
2918 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2919 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2920 {
2921   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2922   PetscErrorCode ierr;
2923   PetscInt       i,*idxb = 0;
2924   PetscScalar    *va,*vb;
2925   Vec            vtmp;
2926 
2927   PetscFunctionBegin;
2928   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2929   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2930   if (idx) {
2931     for (i=0; i<A->rmap->n; i++) {
2932       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2933     }
2934   }
2935 
2936   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2937   if (idx) {
2938     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2939   }
2940   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2941   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2942 
2943   for (i=0; i<A->rmap->n; i++) {
2944     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2945       va[i] = vb[i];
2946       if (idx) idx[i] = a->garray[idxb[i]];
2947     }
2948   }
2949 
2950   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2951   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2952   ierr = PetscFree(idxb);CHKERRQ(ierr);
2953   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2954   PetscFunctionReturn(0);
2955 }
2956 
2957 #undef __FUNCT__
2958 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2959 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2960 {
2961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2962   PetscErrorCode ierr;
2963   PetscInt       i,*idxb = 0;
2964   PetscScalar    *va,*vb;
2965   Vec            vtmp;
2966 
2967   PetscFunctionBegin;
2968   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2969   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2970   if (idx) {
2971     for (i=0; i<A->cmap->n; i++) {
2972       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2973     }
2974   }
2975 
2976   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2977   if (idx) {
2978     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2979   }
2980   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2981   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2982 
2983   for (i=0; i<A->rmap->n; i++) {
2984     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2985       va[i] = vb[i];
2986       if (idx) idx[i] = a->garray[idxb[i]];
2987     }
2988   }
2989 
2990   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2991   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2992   ierr = PetscFree(idxb);CHKERRQ(ierr);
2993   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2994   PetscFunctionReturn(0);
2995 }
2996 
2997 #undef __FUNCT__
2998 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2999 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3000 {
3001   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3002   PetscInt       n      = A->rmap->n;
3003   PetscInt       cstart = A->cmap->rstart;
3004   PetscInt       *cmap  = mat->garray;
3005   PetscInt       *diagIdx, *offdiagIdx;
3006   Vec            diagV, offdiagV;
3007   PetscScalar    *a, *diagA, *offdiagA;
3008   PetscInt       r;
3009   PetscErrorCode ierr;
3010 
3011   PetscFunctionBegin;
3012   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3013   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3014   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3015   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3016   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3017   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3018   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3019   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3020   for (r = 0; r < n; ++r) {
3021     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3022       a[r]   = diagA[r];
3023       idx[r] = cstart + diagIdx[r];
3024     } else {
3025       a[r]   = offdiagA[r];
3026       idx[r] = cmap[offdiagIdx[r]];
3027     }
3028   }
3029   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3030   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3031   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3032   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3033   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3034   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3035   PetscFunctionReturn(0);
3036 }
3037 
3038 #undef __FUNCT__
3039 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3040 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3041 {
3042   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3043   PetscInt       n      = A->rmap->n;
3044   PetscInt       cstart = A->cmap->rstart;
3045   PetscInt       *cmap  = mat->garray;
3046   PetscInt       *diagIdx, *offdiagIdx;
3047   Vec            diagV, offdiagV;
3048   PetscScalar    *a, *diagA, *offdiagA;
3049   PetscInt       r;
3050   PetscErrorCode ierr;
3051 
3052   PetscFunctionBegin;
3053   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3054   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3055   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3056   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3057   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3058   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3059   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3060   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3061   for (r = 0; r < n; ++r) {
3062     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3063       a[r]   = diagA[r];
3064       idx[r] = cstart + diagIdx[r];
3065     } else {
3066       a[r]   = offdiagA[r];
3067       idx[r] = cmap[offdiagIdx[r]];
3068     }
3069   }
3070   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3071   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3072   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3073   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3074   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3075   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3076   PetscFunctionReturn(0);
3077 }
3078 
3079 #undef __FUNCT__
3080 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3081 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3082 {
3083   PetscErrorCode ierr;
3084   Mat            *dummy;
3085 
3086   PetscFunctionBegin;
3087   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3088   *newmat = *dummy;
3089   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3090   PetscFunctionReturn(0);
3091 }
3092 
3093 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3094 
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3097 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3098 {
3099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3100   PetscErrorCode ierr;
3101 
3102   PetscFunctionBegin;
3103   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3104   PetscFunctionReturn(0);
3105 }
3106 
3107 #undef __FUNCT__
3108 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3109 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3110 {
3111   PetscErrorCode ierr;
3112   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3113 
3114   PetscFunctionBegin;
3115   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3116   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3117   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3118   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3119   PetscFunctionReturn(0);
3120 }
3121 
3122 /* -------------------------------------------------------------------*/
3123 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3124                                        MatGetRow_MPIAIJ,
3125                                        MatRestoreRow_MPIAIJ,
3126                                        MatMult_MPIAIJ,
3127                                 /* 4*/ MatMultAdd_MPIAIJ,
3128                                        MatMultTranspose_MPIAIJ,
3129                                        MatMultTransposeAdd_MPIAIJ,
3130 #if defined(PETSC_HAVE_PBGL)
3131                                        MatSolve_MPIAIJ,
3132 #else
3133                                        0,
3134 #endif
3135                                        0,
3136                                        0,
3137                                 /*10*/ 0,
3138                                        0,
3139                                        0,
3140                                        MatSOR_MPIAIJ,
3141                                        MatTranspose_MPIAIJ,
3142                                 /*15*/ MatGetInfo_MPIAIJ,
3143                                        MatEqual_MPIAIJ,
3144                                        MatGetDiagonal_MPIAIJ,
3145                                        MatDiagonalScale_MPIAIJ,
3146                                        MatNorm_MPIAIJ,
3147                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3148                                        MatAssemblyEnd_MPIAIJ,
3149                                        MatSetOption_MPIAIJ,
3150                                        MatZeroEntries_MPIAIJ,
3151                                 /*24*/ MatZeroRows_MPIAIJ,
3152                                        0,
3153 #if defined(PETSC_HAVE_PBGL)
3154                                        0,
3155 #else
3156                                        0,
3157 #endif
3158                                        0,
3159                                        0,
3160                                 /*29*/ MatSetUp_MPIAIJ,
3161 #if defined(PETSC_HAVE_PBGL)
3162                                        0,
3163 #else
3164                                        0,
3165 #endif
3166                                        0,
3167                                        0,
3168                                        0,
3169                                 /*34*/ MatDuplicate_MPIAIJ,
3170                                        0,
3171                                        0,
3172                                        0,
3173                                        0,
3174                                 /*39*/ MatAXPY_MPIAIJ,
3175                                        MatGetSubMatrices_MPIAIJ,
3176                                        MatIncreaseOverlap_MPIAIJ,
3177                                        MatGetValues_MPIAIJ,
3178                                        MatCopy_MPIAIJ,
3179                                 /*44*/ MatGetRowMax_MPIAIJ,
3180                                        MatScale_MPIAIJ,
3181                                        0,
3182                                        0,
3183                                        MatZeroRowsColumns_MPIAIJ,
3184                                 /*49*/ MatSetRandom_MPIAIJ,
3185                                        0,
3186                                        0,
3187                                        0,
3188                                        0,
3189                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3190                                        0,
3191                                        MatSetUnfactored_MPIAIJ,
3192                                        MatPermute_MPIAIJ,
3193                                        0,
3194                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3195                                        MatDestroy_MPIAIJ,
3196                                        MatView_MPIAIJ,
3197                                        0,
3198                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3199                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3200                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3201                                        0,
3202                                        0,
3203                                        0,
3204                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3205                                        MatGetRowMinAbs_MPIAIJ,
3206                                        0,
3207                                        MatSetColoring_MPIAIJ,
3208                                        0,
3209                                        MatSetValuesAdifor_MPIAIJ,
3210                                 /*75*/ MatFDColoringApply_AIJ,
3211                                        0,
3212                                        0,
3213                                        0,
3214                                        MatFindZeroDiagonals_MPIAIJ,
3215                                 /*80*/ 0,
3216                                        0,
3217                                        0,
3218                                 /*83*/ MatLoad_MPIAIJ,
3219                                        0,
3220                                        0,
3221                                        0,
3222                                        0,
3223                                        0,
3224                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3225                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3226                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3227                                        MatPtAP_MPIAIJ_MPIAIJ,
3228                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3229                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3230                                        0,
3231                                        0,
3232                                        0,
3233                                        0,
3234                                 /*99*/ 0,
3235                                        0,
3236                                        0,
3237                                        MatConjugate_MPIAIJ,
3238                                        0,
3239                                 /*104*/MatSetValuesRow_MPIAIJ,
3240                                        MatRealPart_MPIAIJ,
3241                                        MatImaginaryPart_MPIAIJ,
3242                                        0,
3243                                        0,
3244                                 /*109*/0,
3245                                        MatGetRedundantMatrix_MPIAIJ,
3246                                        MatGetRowMin_MPIAIJ,
3247                                        0,
3248                                        0,
3249                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3250                                        0,
3251                                        0,
3252                                        0,
3253                                        0,
3254                                 /*119*/0,
3255                                        0,
3256                                        0,
3257                                        0,
3258                                        MatGetMultiProcBlock_MPIAIJ,
3259                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3260                                        MatGetColumnNorms_MPIAIJ,
3261                                        MatInvertBlockDiagonal_MPIAIJ,
3262                                        0,
3263                                        MatGetSubMatricesParallel_MPIAIJ,
3264                                 /*129*/0,
3265                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3266                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3267                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3268                                        0,
3269                                 /*134*/0,
3270                                        0,
3271                                        0,
3272                                        0,
3273                                        0,
3274                                 /*139*/0,
3275                                        0
3276 };
3277 
3278 /* ----------------------------------------------------------------------------------------*/
3279 
3280 #undef __FUNCT__
3281 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3282 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3283 {
3284   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3285   PetscErrorCode ierr;
3286 
3287   PetscFunctionBegin;
3288   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3289   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3290   PetscFunctionReturn(0);
3291 }
3292 
3293 #undef __FUNCT__
3294 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3295 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3296 {
3297   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3298   PetscErrorCode ierr;
3299 
3300   PetscFunctionBegin;
3301   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3302   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3303   PetscFunctionReturn(0);
3304 }
3305 
3306 #undef __FUNCT__
3307 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3308 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3309 {
3310   Mat_MPIAIJ     *b;
3311   PetscErrorCode ierr;
3312 
3313   PetscFunctionBegin;
3314   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3315   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3316   b = (Mat_MPIAIJ*)B->data;
3317 
3318   if (!B->preallocated) {
3319     /* Explicitly create 2 MATSEQAIJ matrices. */
3320     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3321     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3322     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3323     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3324     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3325     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3326     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3327     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3328     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3329     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3330   }
3331 
3332   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3333   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3334   B->preallocated = PETSC_TRUE;
3335   PetscFunctionReturn(0);
3336 }
3337 
3338 #undef __FUNCT__
3339 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3340 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3341 {
3342   Mat            mat;
3343   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3344   PetscErrorCode ierr;
3345 
3346   PetscFunctionBegin;
3347   *newmat = 0;
3348   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3349   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3350   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3351   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3352   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3353   a       = (Mat_MPIAIJ*)mat->data;
3354 
3355   mat->factortype   = matin->factortype;
3356   mat->rmap->bs     = matin->rmap->bs;
3357   mat->cmap->bs     = matin->cmap->bs;
3358   mat->assembled    = PETSC_TRUE;
3359   mat->insertmode   = NOT_SET_VALUES;
3360   mat->preallocated = PETSC_TRUE;
3361 
3362   a->size         = oldmat->size;
3363   a->rank         = oldmat->rank;
3364   a->donotstash   = oldmat->donotstash;
3365   a->roworiented  = oldmat->roworiented;
3366   a->rowindices   = 0;
3367   a->rowvalues    = 0;
3368   a->getrowactive = PETSC_FALSE;
3369 
3370   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3371   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3372 
3373   if (oldmat->colmap) {
3374 #if defined(PETSC_USE_CTABLE)
3375     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3376 #else
3377     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3378     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3379     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3380 #endif
3381   } else a->colmap = 0;
3382   if (oldmat->garray) {
3383     PetscInt len;
3384     len  = oldmat->B->cmap->n;
3385     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3386     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3387     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3388   } else a->garray = 0;
3389 
3390   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3391   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3392   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3393   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3394   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3395   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3396   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3397   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3398   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3399   *newmat = mat;
3400   PetscFunctionReturn(0);
3401 }
3402 
3403 
3404 
3405 #undef __FUNCT__
3406 #define __FUNCT__ "MatLoad_MPIAIJ"
3407 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3408 {
3409   PetscScalar    *vals,*svals;
3410   MPI_Comm       comm;
3411   PetscErrorCode ierr;
3412   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3413   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3414   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3415   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3416   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3417   int            fd;
3418   PetscInt       bs = 1;
3419 
3420   PetscFunctionBegin;
3421   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3422   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3423   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3424   if (!rank) {
3425     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3426     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3427     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3428   }
3429 
3430   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3431   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3432   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3433 
3434   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3435 
3436   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3437   M    = header[1]; N = header[2];
3438   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3439   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3440   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3441 
3442   /* If global sizes are set, check if they are consistent with that given in the file */
3443   if (sizesset) {
3444     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3445   }
3446   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3447   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3448 
3449   /* determine ownership of all (block) rows */
3450   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3451   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3452   else m = newMat->rmap->n; /* Set by user */
3453 
3454   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3455   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3456 
3457   /* First process needs enough room for process with most rows */
3458   if (!rank) {
3459     mmax = rowners[1];
3460     for (i=2; i<=size; i++) {
3461       mmax = PetscMax(mmax, rowners[i]);
3462     }
3463   } else mmax = -1;             /* unused, but compilers complain */
3464 
3465   rowners[0] = 0;
3466   for (i=2; i<=size; i++) {
3467     rowners[i] += rowners[i-1];
3468   }
3469   rstart = rowners[rank];
3470   rend   = rowners[rank+1];
3471 
3472   /* distribute row lengths to all processors */
3473   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3474   if (!rank) {
3475     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3476     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3477     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3478     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3479     for (j=0; j<m; j++) {
3480       procsnz[0] += ourlens[j];
3481     }
3482     for (i=1; i<size; i++) {
3483       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3484       /* calculate the number of nonzeros on each processor */
3485       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3486         procsnz[i] += rowlengths[j];
3487       }
3488       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3489     }
3490     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3491   } else {
3492     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3493   }
3494 
3495   if (!rank) {
3496     /* determine max buffer needed and allocate it */
3497     maxnz = 0;
3498     for (i=0; i<size; i++) {
3499       maxnz = PetscMax(maxnz,procsnz[i]);
3500     }
3501     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3502 
3503     /* read in my part of the matrix column indices  */
3504     nz   = procsnz[0];
3505     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3506     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3507 
3508     /* read in every one elses and ship off */
3509     for (i=1; i<size; i++) {
3510       nz   = procsnz[i];
3511       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3512       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3513     }
3514     ierr = PetscFree(cols);CHKERRQ(ierr);
3515   } else {
3516     /* determine buffer space needed for message */
3517     nz = 0;
3518     for (i=0; i<m; i++) {
3519       nz += ourlens[i];
3520     }
3521     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3522 
3523     /* receive message of column indices*/
3524     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3525   }
3526 
3527   /* determine column ownership if matrix is not square */
3528   if (N != M) {
3529     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3530     else n = newMat->cmap->n;
3531     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3532     cstart = cend - n;
3533   } else {
3534     cstart = rstart;
3535     cend   = rend;
3536     n      = cend - cstart;
3537   }
3538 
3539   /* loop over local rows, determining number of off diagonal entries */
3540   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3541   jj   = 0;
3542   for (i=0; i<m; i++) {
3543     for (j=0; j<ourlens[i]; j++) {
3544       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3545       jj++;
3546     }
3547   }
3548 
3549   for (i=0; i<m; i++) {
3550     ourlens[i] -= offlens[i];
3551   }
3552   if (!sizesset) {
3553     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3554   }
3555 
3556   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3557 
3558   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3559 
3560   for (i=0; i<m; i++) {
3561     ourlens[i] += offlens[i];
3562   }
3563 
3564   if (!rank) {
3565     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3566 
3567     /* read in my part of the matrix numerical values  */
3568     nz   = procsnz[0];
3569     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3570 
3571     /* insert into matrix */
3572     jj      = rstart;
3573     smycols = mycols;
3574     svals   = vals;
3575     for (i=0; i<m; i++) {
3576       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3577       smycols += ourlens[i];
3578       svals   += ourlens[i];
3579       jj++;
3580     }
3581 
3582     /* read in other processors and ship out */
3583     for (i=1; i<size; i++) {
3584       nz   = procsnz[i];
3585       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3586       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3587     }
3588     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3589   } else {
3590     /* receive numeric values */
3591     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3592 
3593     /* receive message of values*/
3594     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3595 
3596     /* insert into matrix */
3597     jj      = rstart;
3598     smycols = mycols;
3599     svals   = vals;
3600     for (i=0; i<m; i++) {
3601       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3602       smycols += ourlens[i];
3603       svals   += ourlens[i];
3604       jj++;
3605     }
3606   }
3607   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3608   ierr = PetscFree(vals);CHKERRQ(ierr);
3609   ierr = PetscFree(mycols);CHKERRQ(ierr);
3610   ierr = PetscFree(rowners);CHKERRQ(ierr);
3611   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3612   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3613   PetscFunctionReturn(0);
3614 }
3615 
3616 #undef __FUNCT__
3617 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3618 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3619 {
3620   PetscErrorCode ierr;
3621   IS             iscol_local;
3622   PetscInt       csize;
3623 
3624   PetscFunctionBegin;
3625   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3626   if (call == MAT_REUSE_MATRIX) {
3627     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3628     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3629   } else {
3630     PetscInt cbs;
3631     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3632     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3633     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3634   }
3635   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3636   if (call == MAT_INITIAL_MATRIX) {
3637     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3638     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3639   }
3640   PetscFunctionReturn(0);
3641 }
3642 
3643 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3644 #undef __FUNCT__
3645 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3646 /*
3647     Not great since it makes two copies of the submatrix, first an SeqAIJ
3648   in local and then by concatenating the local matrices the end result.
3649   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3650 
3651   Note: This requires a sequential iscol with all indices.
3652 */
3653 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3654 {
3655   PetscErrorCode ierr;
3656   PetscMPIInt    rank,size;
3657   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3658   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3659   PetscBool      allcolumns, colflag;
3660   Mat            M,Mreuse;
3661   MatScalar      *vwork,*aa;
3662   MPI_Comm       comm;
3663   Mat_SeqAIJ     *aij;
3664 
3665   PetscFunctionBegin;
3666   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3667   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3668   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3669 
3670   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3671   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3672   if (colflag && ncol == mat->cmap->N) {
3673     allcolumns = PETSC_TRUE;
3674   } else {
3675     allcolumns = PETSC_FALSE;
3676   }
3677   if (call ==  MAT_REUSE_MATRIX) {
3678     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3679     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3680     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3681   } else {
3682     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3683   }
3684 
3685   /*
3686       m - number of local rows
3687       n - number of columns (same on all processors)
3688       rstart - first row in new global matrix generated
3689   */
3690   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3691   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3692   if (call == MAT_INITIAL_MATRIX) {
3693     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3694     ii  = aij->i;
3695     jj  = aij->j;
3696 
3697     /*
3698         Determine the number of non-zeros in the diagonal and off-diagonal
3699         portions of the matrix in order to do correct preallocation
3700     */
3701 
3702     /* first get start and end of "diagonal" columns */
3703     if (csize == PETSC_DECIDE) {
3704       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3705       if (mglobal == n) { /* square matrix */
3706         nlocal = m;
3707       } else {
3708         nlocal = n/size + ((n % size) > rank);
3709       }
3710     } else {
3711       nlocal = csize;
3712     }
3713     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3714     rstart = rend - nlocal;
3715     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3716 
3717     /* next, compute all the lengths */
3718     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3719     olens = dlens + m;
3720     for (i=0; i<m; i++) {
3721       jend = ii[i+1] - ii[i];
3722       olen = 0;
3723       dlen = 0;
3724       for (j=0; j<jend; j++) {
3725         if (*jj < rstart || *jj >= rend) olen++;
3726         else dlen++;
3727         jj++;
3728       }
3729       olens[i] = olen;
3730       dlens[i] = dlen;
3731     }
3732     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3733     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3734     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3735     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3736     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3737     ierr = PetscFree(dlens);CHKERRQ(ierr);
3738   } else {
3739     PetscInt ml,nl;
3740 
3741     M    = *newmat;
3742     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3743     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3744     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3745     /*
3746          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3747        rather than the slower MatSetValues().
3748     */
3749     M->was_assembled = PETSC_TRUE;
3750     M->assembled     = PETSC_FALSE;
3751   }
3752   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3753   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3754   ii   = aij->i;
3755   jj   = aij->j;
3756   aa   = aij->a;
3757   for (i=0; i<m; i++) {
3758     row   = rstart + i;
3759     nz    = ii[i+1] - ii[i];
3760     cwork = jj;     jj += nz;
3761     vwork = aa;     aa += nz;
3762     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3763   }
3764 
3765   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3766   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3767   *newmat = M;
3768 
3769   /* save submatrix used in processor for next request */
3770   if (call ==  MAT_INITIAL_MATRIX) {
3771     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3772     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3773   }
3774   PetscFunctionReturn(0);
3775 }
3776 
3777 #undef __FUNCT__
3778 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3779 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3780 {
3781   PetscInt       m,cstart, cend,j,nnz,i,d;
3782   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3783   const PetscInt *JJ;
3784   PetscScalar    *values;
3785   PetscErrorCode ierr;
3786 
3787   PetscFunctionBegin;
3788   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3789 
3790   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3791   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3792   m      = B->rmap->n;
3793   cstart = B->cmap->rstart;
3794   cend   = B->cmap->rend;
3795   rstart = B->rmap->rstart;
3796 
3797   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3798 
3799 #if defined(PETSC_USE_DEBUGGING)
3800   for (i=0; i<m; i++) {
3801     nnz = Ii[i+1]- Ii[i];
3802     JJ  = J + Ii[i];
3803     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3804     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3805     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3806   }
3807 #endif
3808 
3809   for (i=0; i<m; i++) {
3810     nnz     = Ii[i+1]- Ii[i];
3811     JJ      = J + Ii[i];
3812     nnz_max = PetscMax(nnz_max,nnz);
3813     d       = 0;
3814     for (j=0; j<nnz; j++) {
3815       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3816     }
3817     d_nnz[i] = d;
3818     o_nnz[i] = nnz - d;
3819   }
3820   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3821   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3822 
3823   if (v) values = (PetscScalar*)v;
3824   else {
3825     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3826     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3827   }
3828 
3829   for (i=0; i<m; i++) {
3830     ii   = i + rstart;
3831     nnz  = Ii[i+1]- Ii[i];
3832     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3833   }
3834   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3835   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3836 
3837   if (!v) {
3838     ierr = PetscFree(values);CHKERRQ(ierr);
3839   }
3840   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3841   PetscFunctionReturn(0);
3842 }
3843 
3844 #undef __FUNCT__
3845 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3846 /*@
3847    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3848    (the default parallel PETSc format).
3849 
3850    Collective on MPI_Comm
3851 
3852    Input Parameters:
3853 +  B - the matrix
3854 .  i - the indices into j for the start of each local row (starts with zero)
3855 .  j - the column indices for each local row (starts with zero)
3856 -  v - optional values in the matrix
3857 
3858    Level: developer
3859 
3860    Notes:
3861        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3862      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3863      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3864 
3865        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3866 
3867        The format which is used for the sparse matrix input, is equivalent to a
3868     row-major ordering.. i.e for the following matrix, the input data expected is
3869     as shown:
3870 
3871         1 0 0
3872         2 0 3     P0
3873        -------
3874         4 5 6     P1
3875 
3876      Process0 [P0]: rows_owned=[0,1]
3877         i =  {0,1,3}  [size = nrow+1  = 2+1]
3878         j =  {0,0,2}  [size = nz = 6]
3879         v =  {1,2,3}  [size = nz = 6]
3880 
3881      Process1 [P1]: rows_owned=[2]
3882         i =  {0,3}    [size = nrow+1  = 1+1]
3883         j =  {0,1,2}  [size = nz = 6]
3884         v =  {4,5,6}  [size = nz = 6]
3885 
3886 .keywords: matrix, aij, compressed row, sparse, parallel
3887 
3888 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3889           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3890 @*/
3891 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3892 {
3893   PetscErrorCode ierr;
3894 
3895   PetscFunctionBegin;
3896   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3897   PetscFunctionReturn(0);
3898 }
3899 
3900 #undef __FUNCT__
3901 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3902 /*@C
3903    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3904    (the default parallel PETSc format).  For good matrix assembly performance
3905    the user should preallocate the matrix storage by setting the parameters
3906    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3907    performance can be increased by more than a factor of 50.
3908 
3909    Collective on MPI_Comm
3910 
3911    Input Parameters:
3912 +  A - the matrix
3913 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3914            (same value is used for all local rows)
3915 .  d_nnz - array containing the number of nonzeros in the various rows of the
3916            DIAGONAL portion of the local submatrix (possibly different for each row)
3917            or NULL, if d_nz is used to specify the nonzero structure.
3918            The size of this array is equal to the number of local rows, i.e 'm'.
3919            For matrices that will be factored, you must leave room for (and set)
3920            the diagonal entry even if it is zero.
3921 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3922            submatrix (same value is used for all local rows).
3923 -  o_nnz - array containing the number of nonzeros in the various rows of the
3924            OFF-DIAGONAL portion of the local submatrix (possibly different for
3925            each row) or NULL, if o_nz is used to specify the nonzero
3926            structure. The size of this array is equal to the number
3927            of local rows, i.e 'm'.
3928 
3929    If the *_nnz parameter is given then the *_nz parameter is ignored
3930 
3931    The AIJ format (also called the Yale sparse matrix format or
3932    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3933    storage.  The stored row and column indices begin with zero.
3934    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3935 
3936    The parallel matrix is partitioned such that the first m0 rows belong to
3937    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3938    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3939 
3940    The DIAGONAL portion of the local submatrix of a processor can be defined
3941    as the submatrix which is obtained by extraction the part corresponding to
3942    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3943    first row that belongs to the processor, r2 is the last row belonging to
3944    the this processor, and c1-c2 is range of indices of the local part of a
3945    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3946    common case of a square matrix, the row and column ranges are the same and
3947    the DIAGONAL part is also square. The remaining portion of the local
3948    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3949 
3950    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3951 
3952    You can call MatGetInfo() to get information on how effective the preallocation was;
3953    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3954    You can also run with the option -info and look for messages with the string
3955    malloc in them to see if additional memory allocation was needed.
3956 
3957    Example usage:
3958 
3959    Consider the following 8x8 matrix with 34 non-zero values, that is
3960    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3961    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3962    as follows:
3963 
3964 .vb
3965             1  2  0  |  0  3  0  |  0  4
3966     Proc0   0  5  6  |  7  0  0  |  8  0
3967             9  0 10  | 11  0  0  | 12  0
3968     -------------------------------------
3969            13  0 14  | 15 16 17  |  0  0
3970     Proc1   0 18  0  | 19 20 21  |  0  0
3971             0  0  0  | 22 23  0  | 24  0
3972     -------------------------------------
3973     Proc2  25 26 27  |  0  0 28  | 29  0
3974            30  0  0  | 31 32 33  |  0 34
3975 .ve
3976 
3977    This can be represented as a collection of submatrices as:
3978 
3979 .vb
3980       A B C
3981       D E F
3982       G H I
3983 .ve
3984 
3985    Where the submatrices A,B,C are owned by proc0, D,E,F are
3986    owned by proc1, G,H,I are owned by proc2.
3987 
3988    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3989    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3990    The 'M','N' parameters are 8,8, and have the same values on all procs.
3991 
3992    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3993    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3994    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3995    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3996    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3997    matrix, ans [DF] as another SeqAIJ matrix.
3998 
3999    When d_nz, o_nz parameters are specified, d_nz storage elements are
4000    allocated for every row of the local diagonal submatrix, and o_nz
4001    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4002    One way to choose d_nz and o_nz is to use the max nonzerors per local
4003    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4004    In this case, the values of d_nz,o_nz are:
4005 .vb
4006      proc0 : dnz = 2, o_nz = 2
4007      proc1 : dnz = 3, o_nz = 2
4008      proc2 : dnz = 1, o_nz = 4
4009 .ve
4010    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4011    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4012    for proc3. i.e we are using 12+15+10=37 storage locations to store
4013    34 values.
4014 
4015    When d_nnz, o_nnz parameters are specified, the storage is specified
4016    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4017    In the above case the values for d_nnz,o_nnz are:
4018 .vb
4019      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4020      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4021      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4022 .ve
4023    Here the space allocated is sum of all the above values i.e 34, and
4024    hence pre-allocation is perfect.
4025 
4026    Level: intermediate
4027 
4028 .keywords: matrix, aij, compressed row, sparse, parallel
4029 
4030 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4031           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4032 @*/
4033 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4034 {
4035   PetscErrorCode ierr;
4036 
4037   PetscFunctionBegin;
4038   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4039   PetscValidType(B,1);
4040   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4041   PetscFunctionReturn(0);
4042 }
4043 
4044 #undef __FUNCT__
4045 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4046 /*@
4047      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4048          CSR format the local rows.
4049 
4050    Collective on MPI_Comm
4051 
4052    Input Parameters:
4053 +  comm - MPI communicator
4054 .  m - number of local rows (Cannot be PETSC_DECIDE)
4055 .  n - This value should be the same as the local size used in creating the
4056        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4057        calculated if N is given) For square matrices n is almost always m.
4058 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4059 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4060 .   i - row indices
4061 .   j - column indices
4062 -   a - matrix values
4063 
4064    Output Parameter:
4065 .   mat - the matrix
4066 
4067    Level: intermediate
4068 
4069    Notes:
4070        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4071      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4072      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4073 
4074        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4075 
4076        The format which is used for the sparse matrix input, is equivalent to a
4077     row-major ordering.. i.e for the following matrix, the input data expected is
4078     as shown:
4079 
4080         1 0 0
4081         2 0 3     P0
4082        -------
4083         4 5 6     P1
4084 
4085      Process0 [P0]: rows_owned=[0,1]
4086         i =  {0,1,3}  [size = nrow+1  = 2+1]
4087         j =  {0,0,2}  [size = nz = 6]
4088         v =  {1,2,3}  [size = nz = 6]
4089 
4090      Process1 [P1]: rows_owned=[2]
4091         i =  {0,3}    [size = nrow+1  = 1+1]
4092         j =  {0,1,2}  [size = nz = 6]
4093         v =  {4,5,6}  [size = nz = 6]
4094 
4095 .keywords: matrix, aij, compressed row, sparse, parallel
4096 
4097 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4098           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4099 @*/
4100 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4101 {
4102   PetscErrorCode ierr;
4103 
4104   PetscFunctionBegin;
4105   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4106   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4107   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4108   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4109   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4110   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4111   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4112   PetscFunctionReturn(0);
4113 }
4114 
4115 #undef __FUNCT__
4116 #define __FUNCT__ "MatCreateAIJ"
4117 /*@C
4118    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4119    (the default parallel PETSc format).  For good matrix assembly performance
4120    the user should preallocate the matrix storage by setting the parameters
4121    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4122    performance can be increased by more than a factor of 50.
4123 
4124    Collective on MPI_Comm
4125 
4126    Input Parameters:
4127 +  comm - MPI communicator
4128 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4129            This value should be the same as the local size used in creating the
4130            y vector for the matrix-vector product y = Ax.
4131 .  n - This value should be the same as the local size used in creating the
4132        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4133        calculated if N is given) For square matrices n is almost always m.
4134 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4135 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4136 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4137            (same value is used for all local rows)
4138 .  d_nnz - array containing the number of nonzeros in the various rows of the
4139            DIAGONAL portion of the local submatrix (possibly different for each row)
4140            or NULL, if d_nz is used to specify the nonzero structure.
4141            The size of this array is equal to the number of local rows, i.e 'm'.
4142 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4143            submatrix (same value is used for all local rows).
4144 -  o_nnz - array containing the number of nonzeros in the various rows of the
4145            OFF-DIAGONAL portion of the local submatrix (possibly different for
4146            each row) or NULL, if o_nz is used to specify the nonzero
4147            structure. The size of this array is equal to the number
4148            of local rows, i.e 'm'.
4149 
4150    Output Parameter:
4151 .  A - the matrix
4152 
4153    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4154    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4155    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4156 
4157    Notes:
4158    If the *_nnz parameter is given then the *_nz parameter is ignored
4159 
4160    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4161    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4162    storage requirements for this matrix.
4163 
4164    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4165    processor than it must be used on all processors that share the object for
4166    that argument.
4167 
4168    The user MUST specify either the local or global matrix dimensions
4169    (possibly both).
4170 
4171    The parallel matrix is partitioned across processors such that the
4172    first m0 rows belong to process 0, the next m1 rows belong to
4173    process 1, the next m2 rows belong to process 2 etc.. where
4174    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4175    values corresponding to [m x N] submatrix.
4176 
4177    The columns are logically partitioned with the n0 columns belonging
4178    to 0th partition, the next n1 columns belonging to the next
4179    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4180 
4181    The DIAGONAL portion of the local submatrix on any given processor
4182    is the submatrix corresponding to the rows and columns m,n
4183    corresponding to the given processor. i.e diagonal matrix on
4184    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4185    etc. The remaining portion of the local submatrix [m x (N-n)]
4186    constitute the OFF-DIAGONAL portion. The example below better
4187    illustrates this concept.
4188 
4189    For a square global matrix we define each processor's diagonal portion
4190    to be its local rows and the corresponding columns (a square submatrix);
4191    each processor's off-diagonal portion encompasses the remainder of the
4192    local matrix (a rectangular submatrix).
4193 
4194    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4195 
4196    When calling this routine with a single process communicator, a matrix of
4197    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4198    type of communicator, use the construction mechanism:
4199      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4200 
4201    By default, this format uses inodes (identical nodes) when possible.
4202    We search for consecutive rows with the same nonzero structure, thereby
4203    reusing matrix information to achieve increased efficiency.
4204 
4205    Options Database Keys:
4206 +  -mat_no_inode  - Do not use inodes
4207 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4208 -  -mat_aij_oneindex - Internally use indexing starting at 1
4209         rather than 0.  Note that when calling MatSetValues(),
4210         the user still MUST index entries starting at 0!
4211 
4212 
4213    Example usage:
4214 
4215    Consider the following 8x8 matrix with 34 non-zero values, that is
4216    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4217    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4218    as follows:
4219 
4220 .vb
4221             1  2  0  |  0  3  0  |  0  4
4222     Proc0   0  5  6  |  7  0  0  |  8  0
4223             9  0 10  | 11  0  0  | 12  0
4224     -------------------------------------
4225            13  0 14  | 15 16 17  |  0  0
4226     Proc1   0 18  0  | 19 20 21  |  0  0
4227             0  0  0  | 22 23  0  | 24  0
4228     -------------------------------------
4229     Proc2  25 26 27  |  0  0 28  | 29  0
4230            30  0  0  | 31 32 33  |  0 34
4231 .ve
4232 
4233    This can be represented as a collection of submatrices as:
4234 
4235 .vb
4236       A B C
4237       D E F
4238       G H I
4239 .ve
4240 
4241    Where the submatrices A,B,C are owned by proc0, D,E,F are
4242    owned by proc1, G,H,I are owned by proc2.
4243 
4244    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4245    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4246    The 'M','N' parameters are 8,8, and have the same values on all procs.
4247 
4248    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4249    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4250    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4251    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4252    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4253    matrix, ans [DF] as another SeqAIJ matrix.
4254 
4255    When d_nz, o_nz parameters are specified, d_nz storage elements are
4256    allocated for every row of the local diagonal submatrix, and o_nz
4257    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4258    One way to choose d_nz and o_nz is to use the max nonzerors per local
4259    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4260    In this case, the values of d_nz,o_nz are:
4261 .vb
4262      proc0 : dnz = 2, o_nz = 2
4263      proc1 : dnz = 3, o_nz = 2
4264      proc2 : dnz = 1, o_nz = 4
4265 .ve
4266    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4267    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4268    for proc3. i.e we are using 12+15+10=37 storage locations to store
4269    34 values.
4270 
4271    When d_nnz, o_nnz parameters are specified, the storage is specified
4272    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4273    In the above case the values for d_nnz,o_nnz are:
4274 .vb
4275      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4276      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4277      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4278 .ve
4279    Here the space allocated is sum of all the above values i.e 34, and
4280    hence pre-allocation is perfect.
4281 
4282    Level: intermediate
4283 
4284 .keywords: matrix, aij, compressed row, sparse, parallel
4285 
4286 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4287           MPIAIJ, MatCreateMPIAIJWithArrays()
4288 @*/
4289 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4290 {
4291   PetscErrorCode ierr;
4292   PetscMPIInt    size;
4293 
4294   PetscFunctionBegin;
4295   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4296   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4297   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4298   if (size > 1) {
4299     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4300     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4301   } else {
4302     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4303     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4304   }
4305   PetscFunctionReturn(0);
4306 }
4307 
4308 #undef __FUNCT__
4309 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4310 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4311 {
4312   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4313 
4314   PetscFunctionBegin;
4315   *Ad     = a->A;
4316   *Ao     = a->B;
4317   *colmap = a->garray;
4318   PetscFunctionReturn(0);
4319 }
4320 
4321 #undef __FUNCT__
4322 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4323 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4324 {
4325   PetscErrorCode ierr;
4326   PetscInt       i;
4327   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4328 
4329   PetscFunctionBegin;
4330   if (coloring->ctype == IS_COLORING_GLOBAL) {
4331     ISColoringValue *allcolors,*colors;
4332     ISColoring      ocoloring;
4333 
4334     /* set coloring for diagonal portion */
4335     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4336 
4337     /* set coloring for off-diagonal portion */
4338     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4339     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4340     for (i=0; i<a->B->cmap->n; i++) {
4341       colors[i] = allcolors[a->garray[i]];
4342     }
4343     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4344     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4345     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4346     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4347   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4348     ISColoringValue *colors;
4349     PetscInt        *larray;
4350     ISColoring      ocoloring;
4351 
4352     /* set coloring for diagonal portion */
4353     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4354     for (i=0; i<a->A->cmap->n; i++) {
4355       larray[i] = i + A->cmap->rstart;
4356     }
4357     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4358     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4359     for (i=0; i<a->A->cmap->n; i++) {
4360       colors[i] = coloring->colors[larray[i]];
4361     }
4362     ierr = PetscFree(larray);CHKERRQ(ierr);
4363     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4364     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4365     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4366 
4367     /* set coloring for off-diagonal portion */
4368     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4369     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4370     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4371     for (i=0; i<a->B->cmap->n; i++) {
4372       colors[i] = coloring->colors[larray[i]];
4373     }
4374     ierr = PetscFree(larray);CHKERRQ(ierr);
4375     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4376     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4377     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4378   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4379   PetscFunctionReturn(0);
4380 }
4381 
4382 #undef __FUNCT__
4383 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4384 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4385 {
4386   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4387   PetscErrorCode ierr;
4388 
4389   PetscFunctionBegin;
4390   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4391   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4392   PetscFunctionReturn(0);
4393 }
4394 
4395 #undef __FUNCT__
4396 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4397 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4398 {
4399   PetscErrorCode ierr;
4400   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4401   PetscInt       *indx;
4402 
4403   PetscFunctionBegin;
4404   /* This routine will ONLY return MPIAIJ type matrix */
4405   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4406   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4407   if (n == PETSC_DECIDE) {
4408     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4409   }
4410   /* Check sum(n) = N */
4411   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4412   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4413 
4414   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4415   rstart -= m;
4416 
4417   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4418   for (i=0; i<m; i++) {
4419     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4420     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4421     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4422   }
4423 
4424   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4425   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4426   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4427   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4428   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4429   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4430   PetscFunctionReturn(0);
4431 }
4432 
4433 #undef __FUNCT__
4434 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4435 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4436 {
4437   PetscErrorCode ierr;
4438   PetscInt       m,N,i,rstart,nnz,Ii;
4439   PetscInt       *indx;
4440   PetscScalar    *values;
4441 
4442   PetscFunctionBegin;
4443   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4444   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4445   for (i=0; i<m; i++) {
4446     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4447     Ii   = i + rstart;
4448     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4449     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4450   }
4451   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4452   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4453   PetscFunctionReturn(0);
4454 }
4455 
4456 #undef __FUNCT__
4457 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4458 /*@
4459       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4460                  matrices from each processor
4461 
4462     Collective on MPI_Comm
4463 
4464    Input Parameters:
4465 +    comm - the communicators the parallel matrix will live on
4466 .    inmat - the input sequential matrices
4467 .    n - number of local columns (or PETSC_DECIDE)
4468 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4469 
4470    Output Parameter:
4471 .    outmat - the parallel matrix generated
4472 
4473     Level: advanced
4474 
4475    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4476 
4477 @*/
4478 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4479 {
4480   PetscErrorCode ierr;
4481 
4482   PetscFunctionBegin;
4483   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4484   if (scall == MAT_INITIAL_MATRIX) {
4485     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4486   }
4487   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4488   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4489   PetscFunctionReturn(0);
4490 }
4491 
4492 #undef __FUNCT__
4493 #define __FUNCT__ "MatFileSplit"
4494 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4495 {
4496   PetscErrorCode    ierr;
4497   PetscMPIInt       rank;
4498   PetscInt          m,N,i,rstart,nnz;
4499   size_t            len;
4500   const PetscInt    *indx;
4501   PetscViewer       out;
4502   char              *name;
4503   Mat               B;
4504   const PetscScalar *values;
4505 
4506   PetscFunctionBegin;
4507   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4508   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4509   /* Should this be the type of the diagonal block of A? */
4510   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4511   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4512   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4513   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4514   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4515   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4516   for (i=0; i<m; i++) {
4517     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4518     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4519     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4520   }
4521   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4522   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4523 
4524   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4525   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4526   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4527   sprintf(name,"%s.%d",outfile,rank);
4528   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4529   ierr = PetscFree(name);CHKERRQ(ierr);
4530   ierr = MatView(B,out);CHKERRQ(ierr);
4531   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4532   ierr = MatDestroy(&B);CHKERRQ(ierr);
4533   PetscFunctionReturn(0);
4534 }
4535 
4536 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4537 #undef __FUNCT__
4538 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4539 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4540 {
4541   PetscErrorCode      ierr;
4542   Mat_Merge_SeqsToMPI *merge;
4543   PetscContainer      container;
4544 
4545   PetscFunctionBegin;
4546   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4547   if (container) {
4548     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4557     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4558     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4559     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4560     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4561     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4562     ierr = PetscFree(merge);CHKERRQ(ierr);
4563     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4564   }
4565   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4566   PetscFunctionReturn(0);
4567 }
4568 
4569 #include <../src/mat/utils/freespace.h>
4570 #include <petscbt.h>
4571 
4572 #undef __FUNCT__
4573 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4574 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4575 {
4576   PetscErrorCode      ierr;
4577   MPI_Comm            comm;
4578   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4579   PetscMPIInt         size,rank,taga,*len_s;
4580   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4581   PetscInt            proc,m;
4582   PetscInt            **buf_ri,**buf_rj;
4583   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4584   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4585   MPI_Request         *s_waits,*r_waits;
4586   MPI_Status          *status;
4587   MatScalar           *aa=a->a;
4588   MatScalar           **abuf_r,*ba_i;
4589   Mat_Merge_SeqsToMPI *merge;
4590   PetscContainer      container;
4591 
4592   PetscFunctionBegin;
4593   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4594   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4595 
4596   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4597   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4598 
4599   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4600   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4601 
4602   bi     = merge->bi;
4603   bj     = merge->bj;
4604   buf_ri = merge->buf_ri;
4605   buf_rj = merge->buf_rj;
4606 
4607   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4608   owners = merge->rowmap->range;
4609   len_s  = merge->len_s;
4610 
4611   /* send and recv matrix values */
4612   /*-----------------------------*/
4613   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4614   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4615 
4616   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4617   for (proc=0,k=0; proc<size; proc++) {
4618     if (!len_s[proc]) continue;
4619     i    = owners[proc];
4620     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4621     k++;
4622   }
4623 
4624   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4625   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4626   ierr = PetscFree(status);CHKERRQ(ierr);
4627 
4628   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4629   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4630 
4631   /* insert mat values of mpimat */
4632   /*----------------------------*/
4633   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4634   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4635 
4636   for (k=0; k<merge->nrecv; k++) {
4637     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4638     nrows       = *(buf_ri_k[k]);
4639     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4640     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4641   }
4642 
4643   /* set values of ba */
4644   m = merge->rowmap->n;
4645   for (i=0; i<m; i++) {
4646     arow = owners[rank] + i;
4647     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4648     bnzi = bi[i+1] - bi[i];
4649     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4650 
4651     /* add local non-zero vals of this proc's seqmat into ba */
4652     anzi   = ai[arow+1] - ai[arow];
4653     aj     = a->j + ai[arow];
4654     aa     = a->a + ai[arow];
4655     nextaj = 0;
4656     for (j=0; nextaj<anzi; j++) {
4657       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4658         ba_i[j] += aa[nextaj++];
4659       }
4660     }
4661 
4662     /* add received vals into ba */
4663     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4664       /* i-th row */
4665       if (i == *nextrow[k]) {
4666         anzi   = *(nextai[k]+1) - *nextai[k];
4667         aj     = buf_rj[k] + *(nextai[k]);
4668         aa     = abuf_r[k] + *(nextai[k]);
4669         nextaj = 0;
4670         for (j=0; nextaj<anzi; j++) {
4671           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4672             ba_i[j] += aa[nextaj++];
4673           }
4674         }
4675         nextrow[k]++; nextai[k]++;
4676       }
4677     }
4678     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4679   }
4680   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4681   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4682 
4683   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4684   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4685   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4686   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4687   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4688   PetscFunctionReturn(0);
4689 }
4690 
4691 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4692 
4693 #undef __FUNCT__
4694 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4695 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4696 {
4697   PetscErrorCode      ierr;
4698   Mat                 B_mpi;
4699   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4700   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4701   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4702   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4703   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4704   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4705   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4706   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4707   MPI_Status          *status;
4708   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4709   PetscBT             lnkbt;
4710   Mat_Merge_SeqsToMPI *merge;
4711   PetscContainer      container;
4712 
4713   PetscFunctionBegin;
4714   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4715 
4716   /* make sure it is a PETSc comm */
4717   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4718   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4719   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4720 
4721   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4722   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4723 
4724   /* determine row ownership */
4725   /*---------------------------------------------------------*/
4726   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4727   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4728   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4729   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4730   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4731   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4732   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4733 
4734   m      = merge->rowmap->n;
4735   owners = merge->rowmap->range;
4736 
4737   /* determine the number of messages to send, their lengths */
4738   /*---------------------------------------------------------*/
4739   len_s = merge->len_s;
4740 
4741   len          = 0; /* length of buf_si[] */
4742   merge->nsend = 0;
4743   for (proc=0; proc<size; proc++) {
4744     len_si[proc] = 0;
4745     if (proc == rank) {
4746       len_s[proc] = 0;
4747     } else {
4748       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4749       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4750     }
4751     if (len_s[proc]) {
4752       merge->nsend++;
4753       nrows = 0;
4754       for (i=owners[proc]; i<owners[proc+1]; i++) {
4755         if (ai[i+1] > ai[i]) nrows++;
4756       }
4757       len_si[proc] = 2*(nrows+1);
4758       len         += len_si[proc];
4759     }
4760   }
4761 
4762   /* determine the number and length of messages to receive for ij-structure */
4763   /*-------------------------------------------------------------------------*/
4764   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4765   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4766 
4767   /* post the Irecv of j-structure */
4768   /*-------------------------------*/
4769   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4770   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4771 
4772   /* post the Isend of j-structure */
4773   /*--------------------------------*/
4774   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4775 
4776   for (proc=0, k=0; proc<size; proc++) {
4777     if (!len_s[proc]) continue;
4778     i    = owners[proc];
4779     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4780     k++;
4781   }
4782 
4783   /* receives and sends of j-structure are complete */
4784   /*------------------------------------------------*/
4785   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4786   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4787 
4788   /* send and recv i-structure */
4789   /*---------------------------*/
4790   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4791   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4792 
4793   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4794   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4795   for (proc=0,k=0; proc<size; proc++) {
4796     if (!len_s[proc]) continue;
4797     /* form outgoing message for i-structure:
4798          buf_si[0]:                 nrows to be sent
4799                [1:nrows]:           row index (global)
4800                [nrows+1:2*nrows+1]: i-structure index
4801     */
4802     /*-------------------------------------------*/
4803     nrows       = len_si[proc]/2 - 1;
4804     buf_si_i    = buf_si + nrows+1;
4805     buf_si[0]   = nrows;
4806     buf_si_i[0] = 0;
4807     nrows       = 0;
4808     for (i=owners[proc]; i<owners[proc+1]; i++) {
4809       anzi = ai[i+1] - ai[i];
4810       if (anzi) {
4811         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4812         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4813         nrows++;
4814       }
4815     }
4816     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4817     k++;
4818     buf_si += len_si[proc];
4819   }
4820 
4821   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4822   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4823 
4824   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4825   for (i=0; i<merge->nrecv; i++) {
4826     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4827   }
4828 
4829   ierr = PetscFree(len_si);CHKERRQ(ierr);
4830   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4831   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4832   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4833   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4834   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4835   ierr = PetscFree(status);CHKERRQ(ierr);
4836 
4837   /* compute a local seq matrix in each processor */
4838   /*----------------------------------------------*/
4839   /* allocate bi array and free space for accumulating nonzero column info */
4840   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4841   bi[0] = 0;
4842 
4843   /* create and initialize a linked list */
4844   nlnk = N+1;
4845   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4846 
4847   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4848   len  = ai[owners[rank+1]] - ai[owners[rank]];
4849   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4850 
4851   current_space = free_space;
4852 
4853   /* determine symbolic info for each local row */
4854   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4855 
4856   for (k=0; k<merge->nrecv; k++) {
4857     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4858     nrows       = *buf_ri_k[k];
4859     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4860     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4861   }
4862 
4863   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4864   len  = 0;
4865   for (i=0; i<m; i++) {
4866     bnzi = 0;
4867     /* add local non-zero cols of this proc's seqmat into lnk */
4868     arow  = owners[rank] + i;
4869     anzi  = ai[arow+1] - ai[arow];
4870     aj    = a->j + ai[arow];
4871     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4872     bnzi += nlnk;
4873     /* add received col data into lnk */
4874     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4875       if (i == *nextrow[k]) { /* i-th row */
4876         anzi  = *(nextai[k]+1) - *nextai[k];
4877         aj    = buf_rj[k] + *nextai[k];
4878         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4879         bnzi += nlnk;
4880         nextrow[k]++; nextai[k]++;
4881       }
4882     }
4883     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4884 
4885     /* if free space is not available, make more free space */
4886     if (current_space->local_remaining<bnzi) {
4887       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4888       nspacedouble++;
4889     }
4890     /* copy data into free space, then initialize lnk */
4891     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4892     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4893 
4894     current_space->array           += bnzi;
4895     current_space->local_used      += bnzi;
4896     current_space->local_remaining -= bnzi;
4897 
4898     bi[i+1] = bi[i] + bnzi;
4899   }
4900 
4901   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4902 
4903   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4904   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4905   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4906 
4907   /* create symbolic parallel matrix B_mpi */
4908   /*---------------------------------------*/
4909   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4910   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4911   if (n==PETSC_DECIDE) {
4912     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4913   } else {
4914     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4915   }
4916   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4917   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4918   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4919   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4920   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4921 
4922   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4923   B_mpi->assembled    = PETSC_FALSE;
4924   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4925   merge->bi           = bi;
4926   merge->bj           = bj;
4927   merge->buf_ri       = buf_ri;
4928   merge->buf_rj       = buf_rj;
4929   merge->coi          = NULL;
4930   merge->coj          = NULL;
4931   merge->owners_co    = NULL;
4932 
4933   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4934 
4935   /* attach the supporting struct to B_mpi for reuse */
4936   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4937   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4938   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4939   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4940   *mpimat = B_mpi;
4941 
4942   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4943   PetscFunctionReturn(0);
4944 }
4945 
4946 #undef __FUNCT__
4947 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4948 /*@C
4949       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4950                  matrices from each processor
4951 
4952     Collective on MPI_Comm
4953 
4954    Input Parameters:
4955 +    comm - the communicators the parallel matrix will live on
4956 .    seqmat - the input sequential matrices
4957 .    m - number of local rows (or PETSC_DECIDE)
4958 .    n - number of local columns (or PETSC_DECIDE)
4959 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4960 
4961    Output Parameter:
4962 .    mpimat - the parallel matrix generated
4963 
4964     Level: advanced
4965 
4966    Notes:
4967      The dimensions of the sequential matrix in each processor MUST be the same.
4968      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4969      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4970 @*/
4971 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4972 {
4973   PetscErrorCode ierr;
4974   PetscMPIInt    size;
4975 
4976   PetscFunctionBegin;
4977   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4978   if (size == 1) {
4979     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4980     if (scall == MAT_INITIAL_MATRIX) {
4981       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4982     } else {
4983       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4984     }
4985     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4986     PetscFunctionReturn(0);
4987   }
4988   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4989   if (scall == MAT_INITIAL_MATRIX) {
4990     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4991   }
4992   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4993   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4994   PetscFunctionReturn(0);
4995 }
4996 
4997 #undef __FUNCT__
4998 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4999 /*@
5000      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5001           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5002           with MatGetSize()
5003 
5004     Not Collective
5005 
5006    Input Parameters:
5007 +    A - the matrix
5008 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5009 
5010    Output Parameter:
5011 .    A_loc - the local sequential matrix generated
5012 
5013     Level: developer
5014 
5015 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5016 
5017 @*/
5018 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5019 {
5020   PetscErrorCode ierr;
5021   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5022   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5023   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5024   MatScalar      *aa=a->a,*ba=b->a,*cam;
5025   PetscScalar    *ca;
5026   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5027   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5028   PetscBool      match;
5029 
5030   PetscFunctionBegin;
5031   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5032   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5033   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5034   if (scall == MAT_INITIAL_MATRIX) {
5035     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5036     ci[0] = 0;
5037     for (i=0; i<am; i++) {
5038       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5039     }
5040     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5041     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5042     k    = 0;
5043     for (i=0; i<am; i++) {
5044       ncols_o = bi[i+1] - bi[i];
5045       ncols_d = ai[i+1] - ai[i];
5046       /* off-diagonal portion of A */
5047       for (jo=0; jo<ncols_o; jo++) {
5048         col = cmap[*bj];
5049         if (col >= cstart) break;
5050         cj[k]   = col; bj++;
5051         ca[k++] = *ba++;
5052       }
5053       /* diagonal portion of A */
5054       for (j=0; j<ncols_d; j++) {
5055         cj[k]   = cstart + *aj++;
5056         ca[k++] = *aa++;
5057       }
5058       /* off-diagonal portion of A */
5059       for (j=jo; j<ncols_o; j++) {
5060         cj[k]   = cmap[*bj++];
5061         ca[k++] = *ba++;
5062       }
5063     }
5064     /* put together the new matrix */
5065     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5066     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5067     /* Since these are PETSc arrays, change flags to free them as necessary. */
5068     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5069     mat->free_a  = PETSC_TRUE;
5070     mat->free_ij = PETSC_TRUE;
5071     mat->nonew   = 0;
5072   } else if (scall == MAT_REUSE_MATRIX) {
5073     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5074     ci = mat->i; cj = mat->j; cam = mat->a;
5075     for (i=0; i<am; i++) {
5076       /* off-diagonal portion of A */
5077       ncols_o = bi[i+1] - bi[i];
5078       for (jo=0; jo<ncols_o; jo++) {
5079         col = cmap[*bj];
5080         if (col >= cstart) break;
5081         *cam++ = *ba++; bj++;
5082       }
5083       /* diagonal portion of A */
5084       ncols_d = ai[i+1] - ai[i];
5085       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5086       /* off-diagonal portion of A */
5087       for (j=jo; j<ncols_o; j++) {
5088         *cam++ = *ba++; bj++;
5089       }
5090     }
5091   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5092   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5093   PetscFunctionReturn(0);
5094 }
5095 
5096 #undef __FUNCT__
5097 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5098 /*@C
5099      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5100 
5101     Not Collective
5102 
5103    Input Parameters:
5104 +    A - the matrix
5105 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5106 -    row, col - index sets of rows and columns to extract (or NULL)
5107 
5108    Output Parameter:
5109 .    A_loc - the local sequential matrix generated
5110 
5111     Level: developer
5112 
5113 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5114 
5115 @*/
5116 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5117 {
5118   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5119   PetscErrorCode ierr;
5120   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5121   IS             isrowa,iscola;
5122   Mat            *aloc;
5123   PetscBool      match;
5124 
5125   PetscFunctionBegin;
5126   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5127   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5128   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5129   if (!row) {
5130     start = A->rmap->rstart; end = A->rmap->rend;
5131     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5132   } else {
5133     isrowa = *row;
5134   }
5135   if (!col) {
5136     start = A->cmap->rstart;
5137     cmap  = a->garray;
5138     nzA   = a->A->cmap->n;
5139     nzB   = a->B->cmap->n;
5140     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5141     ncols = 0;
5142     for (i=0; i<nzB; i++) {
5143       if (cmap[i] < start) idx[ncols++] = cmap[i];
5144       else break;
5145     }
5146     imark = i;
5147     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5148     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5149     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5150   } else {
5151     iscola = *col;
5152   }
5153   if (scall != MAT_INITIAL_MATRIX) {
5154     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5155     aloc[0] = *A_loc;
5156   }
5157   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5158   *A_loc = aloc[0];
5159   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5160   if (!row) {
5161     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5162   }
5163   if (!col) {
5164     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5165   }
5166   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5167   PetscFunctionReturn(0);
5168 }
5169 
5170 #undef __FUNCT__
5171 #define __FUNCT__ "MatGetBrowsOfAcols"
5172 /*@C
5173     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5174 
5175     Collective on Mat
5176 
5177    Input Parameters:
5178 +    A,B - the matrices in mpiaij format
5179 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5180 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5181 
5182    Output Parameter:
5183 +    rowb, colb - index sets of rows and columns of B to extract
5184 -    B_seq - the sequential matrix generated
5185 
5186     Level: developer
5187 
5188 @*/
5189 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5190 {
5191   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5192   PetscErrorCode ierr;
5193   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5194   IS             isrowb,iscolb;
5195   Mat            *bseq=NULL;
5196 
5197   PetscFunctionBegin;
5198   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5199     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5200   }
5201   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5202 
5203   if (scall == MAT_INITIAL_MATRIX) {
5204     start = A->cmap->rstart;
5205     cmap  = a->garray;
5206     nzA   = a->A->cmap->n;
5207     nzB   = a->B->cmap->n;
5208     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5209     ncols = 0;
5210     for (i=0; i<nzB; i++) {  /* row < local row index */
5211       if (cmap[i] < start) idx[ncols++] = cmap[i];
5212       else break;
5213     }
5214     imark = i;
5215     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5216     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5217     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5218     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5219   } else {
5220     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5221     isrowb  = *rowb; iscolb = *colb;
5222     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5223     bseq[0] = *B_seq;
5224   }
5225   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5226   *B_seq = bseq[0];
5227   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5228   if (!rowb) {
5229     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5230   } else {
5231     *rowb = isrowb;
5232   }
5233   if (!colb) {
5234     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5235   } else {
5236     *colb = iscolb;
5237   }
5238   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5239   PetscFunctionReturn(0);
5240 }
5241 
5242 #undef __FUNCT__
5243 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5244 /*
5245     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5246     of the OFF-DIAGONAL portion of local A
5247 
5248     Collective on Mat
5249 
5250    Input Parameters:
5251 +    A,B - the matrices in mpiaij format
5252 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5253 
5254    Output Parameter:
5255 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5256 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5257 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5258 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5259 
5260     Level: developer
5261 
5262 */
5263 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5264 {
5265   VecScatter_MPI_General *gen_to,*gen_from;
5266   PetscErrorCode         ierr;
5267   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5268   Mat_SeqAIJ             *b_oth;
5269   VecScatter             ctx =a->Mvctx;
5270   MPI_Comm               comm;
5271   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5272   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5273   PetscScalar            *rvalues,*svalues;
5274   MatScalar              *b_otha,*bufa,*bufA;
5275   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5276   MPI_Request            *rwaits = NULL,*swaits = NULL;
5277   MPI_Status             *sstatus,rstatus;
5278   PetscMPIInt            jj;
5279   PetscInt               *cols,sbs,rbs;
5280   PetscScalar            *vals;
5281 
5282   PetscFunctionBegin;
5283   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5284   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5285     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5286   }
5287   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5288   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5289 
5290   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5291   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5292   rvalues  = gen_from->values; /* holds the length of receiving row */
5293   svalues  = gen_to->values;   /* holds the length of sending row */
5294   nrecvs   = gen_from->n;
5295   nsends   = gen_to->n;
5296 
5297   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5298   srow    = gen_to->indices;    /* local row index to be sent */
5299   sstarts = gen_to->starts;
5300   sprocs  = gen_to->procs;
5301   sstatus = gen_to->sstatus;
5302   sbs     = gen_to->bs;
5303   rstarts = gen_from->starts;
5304   rprocs  = gen_from->procs;
5305   rbs     = gen_from->bs;
5306 
5307   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5308   if (scall == MAT_INITIAL_MATRIX) {
5309     /* i-array */
5310     /*---------*/
5311     /*  post receives */
5312     for (i=0; i<nrecvs; i++) {
5313       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5314       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5315       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5316     }
5317 
5318     /* pack the outgoing message */
5319     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5320 
5321     sstartsj[0] = 0;
5322     rstartsj[0] = 0;
5323     len         = 0; /* total length of j or a array to be sent */
5324     k           = 0;
5325     for (i=0; i<nsends; i++) {
5326       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5327       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5328       for (j=0; j<nrows; j++) {
5329         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5330         for (l=0; l<sbs; l++) {
5331           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5332 
5333           rowlen[j*sbs+l] = ncols;
5334 
5335           len += ncols;
5336           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5337         }
5338         k++;
5339       }
5340       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5341 
5342       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5343     }
5344     /* recvs and sends of i-array are completed */
5345     i = nrecvs;
5346     while (i--) {
5347       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5348     }
5349     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5350 
5351     /* allocate buffers for sending j and a arrays */
5352     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5353     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5354 
5355     /* create i-array of B_oth */
5356     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5357 
5358     b_othi[0] = 0;
5359     len       = 0; /* total length of j or a array to be received */
5360     k         = 0;
5361     for (i=0; i<nrecvs; i++) {
5362       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5363       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5364       for (j=0; j<nrows; j++) {
5365         b_othi[k+1] = b_othi[k] + rowlen[j];
5366         len        += rowlen[j]; k++;
5367       }
5368       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5369     }
5370 
5371     /* allocate space for j and a arrrays of B_oth */
5372     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5373     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5374 
5375     /* j-array */
5376     /*---------*/
5377     /*  post receives of j-array */
5378     for (i=0; i<nrecvs; i++) {
5379       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5380       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5381     }
5382 
5383     /* pack the outgoing message j-array */
5384     k = 0;
5385     for (i=0; i<nsends; i++) {
5386       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5387       bufJ  = bufj+sstartsj[i];
5388       for (j=0; j<nrows; j++) {
5389         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5390         for (ll=0; ll<sbs; ll++) {
5391           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5392           for (l=0; l<ncols; l++) {
5393             *bufJ++ = cols[l];
5394           }
5395           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5396         }
5397       }
5398       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5399     }
5400 
5401     /* recvs and sends of j-array are completed */
5402     i = nrecvs;
5403     while (i--) {
5404       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5405     }
5406     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5407   } else if (scall == MAT_REUSE_MATRIX) {
5408     sstartsj = *startsj_s;
5409     rstartsj = *startsj_r;
5410     bufa     = *bufa_ptr;
5411     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5412     b_otha   = b_oth->a;
5413   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5414 
5415   /* a-array */
5416   /*---------*/
5417   /*  post receives of a-array */
5418   for (i=0; i<nrecvs; i++) {
5419     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5420     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5421   }
5422 
5423   /* pack the outgoing message a-array */
5424   k = 0;
5425   for (i=0; i<nsends; i++) {
5426     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5427     bufA  = bufa+sstartsj[i];
5428     for (j=0; j<nrows; j++) {
5429       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5430       for (ll=0; ll<sbs; ll++) {
5431         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5432         for (l=0; l<ncols; l++) {
5433           *bufA++ = vals[l];
5434         }
5435         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5436       }
5437     }
5438     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5439   }
5440   /* recvs and sends of a-array are completed */
5441   i = nrecvs;
5442   while (i--) {
5443     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5444   }
5445   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5446   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5447 
5448   if (scall == MAT_INITIAL_MATRIX) {
5449     /* put together the new matrix */
5450     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5451 
5452     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5453     /* Since these are PETSc arrays, change flags to free them as necessary. */
5454     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5455     b_oth->free_a  = PETSC_TRUE;
5456     b_oth->free_ij = PETSC_TRUE;
5457     b_oth->nonew   = 0;
5458 
5459     ierr = PetscFree(bufj);CHKERRQ(ierr);
5460     if (!startsj_s || !bufa_ptr) {
5461       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5462       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5463     } else {
5464       *startsj_s = sstartsj;
5465       *startsj_r = rstartsj;
5466       *bufa_ptr  = bufa;
5467     }
5468   }
5469   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5470   PetscFunctionReturn(0);
5471 }
5472 
5473 #undef __FUNCT__
5474 #define __FUNCT__ "MatGetCommunicationStructs"
5475 /*@C
5476   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5477 
5478   Not Collective
5479 
5480   Input Parameters:
5481 . A - The matrix in mpiaij format
5482 
5483   Output Parameter:
5484 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5485 . colmap - A map from global column index to local index into lvec
5486 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5487 
5488   Level: developer
5489 
5490 @*/
5491 #if defined(PETSC_USE_CTABLE)
5492 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5493 #else
5494 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5495 #endif
5496 {
5497   Mat_MPIAIJ *a;
5498 
5499   PetscFunctionBegin;
5500   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5501   PetscValidPointer(lvec, 2);
5502   PetscValidPointer(colmap, 3);
5503   PetscValidPointer(multScatter, 4);
5504   a = (Mat_MPIAIJ*) A->data;
5505   if (lvec) *lvec = a->lvec;
5506   if (colmap) *colmap = a->colmap;
5507   if (multScatter) *multScatter = a->Mvctx;
5508   PetscFunctionReturn(0);
5509 }
5510 
5511 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5512 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5513 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5514 
5515 #undef __FUNCT__
5516 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5517 /*
5518     Computes (B'*A')' since computing B*A directly is untenable
5519 
5520                n                       p                          p
5521         (              )       (              )         (                  )
5522       m (      A       )  *  n (       B      )   =   m (         C        )
5523         (              )       (              )         (                  )
5524 
5525 */
5526 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5527 {
5528   PetscErrorCode ierr;
5529   Mat            At,Bt,Ct;
5530 
5531   PetscFunctionBegin;
5532   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5533   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5534   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5535   ierr = MatDestroy(&At);CHKERRQ(ierr);
5536   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5537   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5538   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5539   PetscFunctionReturn(0);
5540 }
5541 
5542 #undef __FUNCT__
5543 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5544 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5545 {
5546   PetscErrorCode ierr;
5547   PetscInt       m=A->rmap->n,n=B->cmap->n;
5548   Mat            Cmat;
5549 
5550   PetscFunctionBegin;
5551   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5552   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5553   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5554   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5555   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5556   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5557   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5558   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5559 
5560   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5561 
5562   *C = Cmat;
5563   PetscFunctionReturn(0);
5564 }
5565 
5566 /* ----------------------------------------------------------------*/
5567 #undef __FUNCT__
5568 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5569 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5570 {
5571   PetscErrorCode ierr;
5572 
5573   PetscFunctionBegin;
5574   if (scall == MAT_INITIAL_MATRIX) {
5575     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5576     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5577     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5578   }
5579   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5580   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5581   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5582   PetscFunctionReturn(0);
5583 }
5584 
5585 #if defined(PETSC_HAVE_MUMPS)
5586 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5587 #endif
5588 #if defined(PETSC_HAVE_PASTIX)
5589 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5590 #endif
5591 #if defined(PETSC_HAVE_SUPERLU_DIST)
5592 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5593 #endif
5594 #if defined(PETSC_HAVE_CLIQUE)
5595 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5596 #endif
5597 
5598 /*MC
5599    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5600 
5601    Options Database Keys:
5602 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5603 
5604   Level: beginner
5605 
5606 .seealso: MatCreateAIJ()
5607 M*/
5608 
5609 #undef __FUNCT__
5610 #define __FUNCT__ "MatCreate_MPIAIJ"
5611 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5612 {
5613   Mat_MPIAIJ     *b;
5614   PetscErrorCode ierr;
5615   PetscMPIInt    size;
5616 
5617   PetscFunctionBegin;
5618   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5619 
5620   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5621   B->data       = (void*)b;
5622   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5623   B->assembled  = PETSC_FALSE;
5624   B->insertmode = NOT_SET_VALUES;
5625   b->size       = size;
5626 
5627   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5628 
5629   /* build cache for off array entries formed */
5630   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5631 
5632   b->donotstash  = PETSC_FALSE;
5633   b->colmap      = 0;
5634   b->garray      = 0;
5635   b->roworiented = PETSC_TRUE;
5636 
5637   /* stuff used for matrix vector multiply */
5638   b->lvec  = NULL;
5639   b->Mvctx = NULL;
5640 
5641   /* stuff for MatGetRow() */
5642   b->rowindices   = 0;
5643   b->rowvalues    = 0;
5644   b->getrowactive = PETSC_FALSE;
5645 
5646   /* flexible pointer used in CUSP/CUSPARSE classes */
5647   b->spptr = NULL;
5648 
5649 #if defined(PETSC_HAVE_MUMPS)
5650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5651 #endif
5652 #if defined(PETSC_HAVE_PASTIX)
5653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5654 #endif
5655 #if defined(PETSC_HAVE_SUPERLU_DIST)
5656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5657 #endif
5658 #if defined(PETSC_HAVE_CLIQUE)
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5660 #endif
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5674   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5675   PetscFunctionReturn(0);
5676 }
5677 
5678 #undef __FUNCT__
5679 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5680 /*@
5681      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5682          and "off-diagonal" part of the matrix in CSR format.
5683 
5684    Collective on MPI_Comm
5685 
5686    Input Parameters:
5687 +  comm - MPI communicator
5688 .  m - number of local rows (Cannot be PETSC_DECIDE)
5689 .  n - This value should be the same as the local size used in creating the
5690        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5691        calculated if N is given) For square matrices n is almost always m.
5692 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5693 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5694 .   i - row indices for "diagonal" portion of matrix
5695 .   j - column indices
5696 .   a - matrix values
5697 .   oi - row indices for "off-diagonal" portion of matrix
5698 .   oj - column indices
5699 -   oa - matrix values
5700 
5701    Output Parameter:
5702 .   mat - the matrix
5703 
5704    Level: advanced
5705 
5706    Notes:
5707        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5708        must free the arrays once the matrix has been destroyed and not before.
5709 
5710        The i and j indices are 0 based
5711 
5712        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5713 
5714        This sets local rows and cannot be used to set off-processor values.
5715 
5716        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5717        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5718        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5719        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5720        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5721        communication if it is known that only local entries will be set.
5722 
5723 .keywords: matrix, aij, compressed row, sparse, parallel
5724 
5725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5726           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5727 @*/
5728 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5729 {
5730   PetscErrorCode ierr;
5731   Mat_MPIAIJ     *maij;
5732 
5733   PetscFunctionBegin;
5734   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5735   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5736   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5737   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5738   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5739   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5740   maij = (Mat_MPIAIJ*) (*mat)->data;
5741 
5742   (*mat)->preallocated = PETSC_TRUE;
5743 
5744   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5745   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5746 
5747   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5748   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5749 
5750   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5751   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5752   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5753   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5754 
5755   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5756   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5757   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5758   PetscFunctionReturn(0);
5759 }
5760 
5761 /*
5762     Special version for direct calls from Fortran
5763 */
5764 #include <petsc-private/fortranimpl.h>
5765 
5766 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5767 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5768 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5769 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5770 #endif
5771 
5772 /* Change these macros so can be used in void function */
5773 #undef CHKERRQ
5774 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5775 #undef SETERRQ2
5776 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5777 #undef SETERRQ3
5778 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5779 #undef SETERRQ
5780 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5781 
5782 #undef __FUNCT__
5783 #define __FUNCT__ "matsetvaluesmpiaij_"
5784 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5785 {
5786   Mat            mat  = *mmat;
5787   PetscInt       m    = *mm, n = *mn;
5788   InsertMode     addv = *maddv;
5789   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5790   PetscScalar    value;
5791   PetscErrorCode ierr;
5792 
5793   MatCheckPreallocated(mat,1);
5794   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5795 
5796 #if defined(PETSC_USE_DEBUG)
5797   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5798 #endif
5799   {
5800     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5801     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5802     PetscBool roworiented = aij->roworiented;
5803 
5804     /* Some Variables required in the macro */
5805     Mat        A                 = aij->A;
5806     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5807     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5808     MatScalar  *aa               = a->a;
5809     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5810     Mat        B                 = aij->B;
5811     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5812     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5813     MatScalar  *ba               = b->a;
5814 
5815     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5816     PetscInt  nonew = a->nonew;
5817     MatScalar *ap1,*ap2;
5818 
5819     PetscFunctionBegin;
5820     for (i=0; i<m; i++) {
5821       if (im[i] < 0) continue;
5822 #if defined(PETSC_USE_DEBUG)
5823       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5824 #endif
5825       if (im[i] >= rstart && im[i] < rend) {
5826         row      = im[i] - rstart;
5827         lastcol1 = -1;
5828         rp1      = aj + ai[row];
5829         ap1      = aa + ai[row];
5830         rmax1    = aimax[row];
5831         nrow1    = ailen[row];
5832         low1     = 0;
5833         high1    = nrow1;
5834         lastcol2 = -1;
5835         rp2      = bj + bi[row];
5836         ap2      = ba + bi[row];
5837         rmax2    = bimax[row];
5838         nrow2    = bilen[row];
5839         low2     = 0;
5840         high2    = nrow2;
5841 
5842         for (j=0; j<n; j++) {
5843           if (roworiented) value = v[i*n+j];
5844           else value = v[i+j*m];
5845           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5846           if (in[j] >= cstart && in[j] < cend) {
5847             col = in[j] - cstart;
5848             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5849           } else if (in[j] < 0) continue;
5850 #if defined(PETSC_USE_DEBUG)
5851           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5852 #endif
5853           else {
5854             if (mat->was_assembled) {
5855               if (!aij->colmap) {
5856                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5857               }
5858 #if defined(PETSC_USE_CTABLE)
5859               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5860               col--;
5861 #else
5862               col = aij->colmap[in[j]] - 1;
5863 #endif
5864               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5865                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5866                 col  =  in[j];
5867                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5868                 B     = aij->B;
5869                 b     = (Mat_SeqAIJ*)B->data;
5870                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5871                 rp2   = bj + bi[row];
5872                 ap2   = ba + bi[row];
5873                 rmax2 = bimax[row];
5874                 nrow2 = bilen[row];
5875                 low2  = 0;
5876                 high2 = nrow2;
5877                 bm    = aij->B->rmap->n;
5878                 ba    = b->a;
5879               }
5880             } else col = in[j];
5881             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5882           }
5883         }
5884       } else if (!aij->donotstash) {
5885         if (roworiented) {
5886           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5887         } else {
5888           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5889         }
5890       }
5891     }
5892   }
5893   PetscFunctionReturnVoid();
5894 }
5895 
5896