xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4388c78fe623f90da5f9f3706ea20bd244c294a1)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatDestroy_MatRedundant"
2512 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2513 {
2514   PetscErrorCode ierr;
2515   Mat_Redundant  *redund;
2516   PetscInt       i;
2517   PetscMPIInt    size;
2518 
2519   PetscFunctionBegin;
2520   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2521   if (size == 1) {
2522     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2523     redund = a->redundant;
2524   } else {
2525     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2526     redund = a->redundant;
2527   }
2528   if (redund){
2529     if (redund->matseq) { /* '-new' option */
2530       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2531       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2532       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2533       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2534     } else {
2535       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2536       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2537       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2538       for (i=0; i<redund->nrecvs; i++) {
2539         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2540         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2541       }
2542       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2543 
2544       if (redund->psubcomm) {
2545         ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2546       }
2547     }
2548 
2549     ierr = redund->Destroy(A);CHKERRQ(ierr);
2550     ierr = PetscFree(redund);CHKERRQ(ierr);
2551   }
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 #undef __FUNCT__
2556 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm"
2557 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2558 {
2559   PetscMPIInt    rank,size;
2560   MPI_Comm       comm,subcomm=psubcomm->comm;
2561   PetscErrorCode ierr;
2562   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2563   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2564   PetscInt       *rowrange = mat->rmap->range;
2565   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2566   Mat            A = aij->A,B=aij->B,C=*matredundant;
2567   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2568   PetscScalar    *sbuf_a;
2569   PetscInt       nzlocal=a->nz+b->nz;
2570   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2571   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2572   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2573   MatScalar      *aworkA,*aworkB;
2574   PetscScalar    *vals;
2575   PetscMPIInt    tag1,tag2,tag3,imdex;
2576   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2577   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2578   MPI_Status     recv_status,*send_status;
2579   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2580   PetscInt       **rbuf_j=NULL;
2581   PetscScalar    **rbuf_a=NULL;
2582   Mat_Redundant  *redund =NULL;
2583   PetscBool      flg=PETSC_FALSE;
2584 
2585   PetscFunctionBegin;
2586   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2587   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2588   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2589   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2590   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2591 
2592   /* ---------- new imples: use MatGetSubMatrices() ------------*/
2593   ierr = PetscOptionsGetBool(NULL,"-new",&flg,NULL);CHKERRQ(ierr);
2594   if (flg) {
2595     Mat        *matseq;
2596     IS         isrow,iscol;
2597     PetscInt   mloc_sub,rstart,rend;
2598 
2599     if (reuse == MAT_INITIAL_MATRIX) {
2600       /* create a local sequential matrix matseq[0] */
2601       ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2602       ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2603       ierr = MatSetUp(C);CHKERRQ(ierr);
2604       ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2605       ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2606       ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2607 
2608       ierr = MatGetOwnershipRange(C,&rstart,&rend);CHKERRQ(ierr);
2609       ierr = MatDestroy(&C);CHKERRQ(ierr);
2610       mloc_sub = rend - rstart;
2611 
2612       /* printf("[%d] Use MatGetSubMatrices()...rows %d - %d, mloc_sub %d\n",rank,rstart,rend,mloc_sub); */
2613       ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2614       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2615     } else { /* reuse == MAT_REUSE_MATRIX */
2616       if (subsize == 1) {
2617         Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2618         redund = c->redundant;
2619       } else {
2620         Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2621         redund = c->redundant;
2622       }
2623 
2624       isrow = redund->isrow;
2625       iscol = redund->iscol;
2626       matseq = redund->matseq;
2627     }
2628 
2629     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2630     /*
2631     if (rank==0) {
2632       printf("[%d] matsub:\n",rank);
2633       ierr = MatView(matseq[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
2634     }
2635     ierr = MPI_Barrier(comm);CHKERRQ(ierr);
2636      */
2637 
2638     /* Create matredundant by concatenating matseq[0] from processors in this subcomm */
2639     /*
2640     if (reuse == MAT_REUSE_MATRIX) {
2641       if (!rank) printf("matredundant:\n");
2642       ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
2643     }
2644      */
2645     ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2646     /*
2647     if (nsubcomm == 1) {
2648       if (!rank) printf( "matredundant\n");
2649       ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
2650      } */
2651 
2652     if (reuse == MAT_INITIAL_MATRIX) {
2653       /* create a supporting struct and attach it to C for reuse */
2654       ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr);
2655       if (subsize == 1) {
2656         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2657         c->redundant = redund;
2658       } else {
2659         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2660         c->redundant = redund;
2661       }
2662 
2663       redund->isrow     = isrow;
2664       redund->iscol     = iscol;
2665       redund->matseq    = matseq;
2666       redund->psubcomm  = NULL;
2667 
2668       redund->Destroy               = (*matredundant)->ops->destroy;
2669       (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2670     }
2671     PetscFunctionReturn(0);
2672   }
2673   /* ----------------------------------------------------*/
2674 
2675   if (reuse == MAT_REUSE_MATRIX) {
2676     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2677     if (subsize == 1) {
2678       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2679       redund = c->redundant;
2680     } else {
2681       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2682       redund = c->redundant;
2683     }
2684     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2685 
2686     nsends    = redund->nsends;
2687     nrecvs    = redund->nrecvs;
2688     send_rank = redund->send_rank;
2689     recv_rank = redund->recv_rank;
2690     sbuf_nz   = redund->sbuf_nz;
2691     rbuf_nz   = redund->rbuf_nz;
2692     sbuf_j    = redund->sbuf_j;
2693     sbuf_a    = redund->sbuf_a;
2694     rbuf_j    = redund->rbuf_j;
2695     rbuf_a    = redund->rbuf_a;
2696   }
2697 
2698   if (reuse == MAT_INITIAL_MATRIX) {
2699     PetscInt    nleftover,np_subcomm;
2700 
2701     /* get the destination processors' id send_rank, nsends and nrecvs */
2702     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2703 
2704     np_subcomm = size/nsubcomm;
2705     nleftover  = size - nsubcomm*np_subcomm;
2706 
2707     nsends = 0; nrecvs = 0;
2708     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2709       /* -------------------------------------------*/
2710       for (i=0; i<size; i++) {
2711         if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2712           send_rank[nsends]   = i; nsends++;
2713           recv_rank[nrecvs++] = i;
2714           /* printf("[%d] send to and recv from [%d]\n",rank,i); */
2715         }
2716       }
2717       if (rank >= size - nleftover) { /* this proc is a leftover processor */
2718         i = size-nleftover-1;
2719         j = 0;
2720         while (j < nsubcomm - nleftover) {
2721           send_rank[nsends++] = i;
2722           i--; j++;
2723           /* printf("[%d] send to [%d]\n",rank,i); */
2724         }
2725       }
2726 
2727       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2728         for (i=0; i<nleftover; i++) {
2729           recv_rank[nrecvs++] = size-nleftover+i;
2730           /* printf("[%d] recv from [%d]\n",rank,i); */
2731         }
2732       }
2733     } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) {
2734       /* --------------------------------------------------*/
2735       /* ---------- new imples: use MatGetSubMatrices() ------------*/
2736       PetscBool flg=PETSC_FALSE;
2737       ierr = PetscOptionsGetBool(NULL,"-new",&flg,NULL);CHKERRQ(ierr);
2738       if (flg) {
2739         Mat        *matseq;
2740         IS         isrow,iscol;
2741         PetscInt   mloc_sub,rstart,rend;
2742 
2743         /* create redundant matrix */
2744         ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2745         ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2746         ierr = MatSetUp(C);CHKERRQ(ierr);
2747         ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2748         ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2749         ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2750 
2751         ierr = MatGetOwnershipRange(C,&rstart,&rend);CHKERRQ(ierr);
2752         mloc_sub = rend - rstart;
2753 
2754         printf("[%d] Use MatGetSubMatrices()...rows %d - %d, mloc_sub %d\n",rank,rstart,rend,mloc_sub);
2755         ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2756         ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2757 
2758         ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&matseq);CHKERRQ(ierr);
2759         if (rank==1) {
2760           printf("[%d] matsub:\n",rank);
2761           ierr = MatView(matseq[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
2762         }
2763 
2764         /* Create redundant matrix by concatenating local sequential
2765                  matrices from processors in this subcomm */
2766         ierr = MatDestroy(&C);CHKERRQ(ierr);
2767 
2768         ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr);
2769         *matredundant = C;
2770 
2771         if (nsubcomm == 1) {
2772           if (!rank) printf( "C\n");
2773           ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
2774         }
2775 
2776         ierr = MatDestroy(&matseq[0]);CHKERRQ(ierr);
2777         ierr = PetscFree(matseq);CHKERRQ(ierr);
2778         ierr = ISDestroy(&isrow);CHKERRQ(ierr);
2779         ierr = ISDestroy(&iscol);CHKERRQ(ierr);
2780         //ierr = MPI_Barrier(comm);CHKERRQ(ierr);
2781         //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No done yet");
2782         PetscFunctionReturn(0);
2783       }
2784 
2785       /*---------------------------------- */
2786       PetscInt color,subcommstart;
2787       subcommstart=0;
2788       for (color=0; color<nsubcomm; color++) {
2789         if (psubcomm->color != color) {
2790           for (i=0; i<psubcomm->subsize[color]; i++) {
2791             if (subrank == i) { /* my_subrank == other's subrank */
2792               send_rank[nsends++] = subcommstart+i;
2793               recv_rank[nrecvs++] = subcommstart+i;
2794               /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */
2795             }
2796           }
2797         }
2798         subcommstart += psubcomm->subsize[color];
2799       }
2800       if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */
2801         subcommstart=0;
2802         for (color=0; color<nsubcomm; color++) {
2803           subcommstart += psubcomm->subsize[color];
2804           if (psubcomm->color == color) continue;
2805           if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */
2806             send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */
2807             /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */
2808           }
2809         }
2810       }
2811 
2812       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2813         subcommstart=0;
2814         for (color=0; color<nsubcomm; color++) {
2815           subcommstart += psubcomm->subsize[color];
2816           if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */
2817             recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */
2818             /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */
2819           }
2820         }
2821       }
2822     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for PetscSubcomm type %D",psubcomm->type);
2823 
2824     /* allocate sbuf_j, sbuf_a */
2825     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2826     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2827     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2828     /*
2829     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2830     ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
2831      */
2832   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2833 
2834   /* copy mat's local entries into the buffers */
2835   if (reuse == MAT_INITIAL_MATRIX) {
2836     rownz_max = 0;
2837     rptr      = sbuf_j;
2838     cols      = sbuf_j + rend-rstart + 1;
2839     vals      = sbuf_a;
2840     rptr[0]   = 0;
2841     for (i=0; i<rend-rstart; i++) {
2842       row    = i + rstart;
2843       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2844       ncols  = nzA + nzB;
2845       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2846       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2847       /* load the column indices for this row into cols */
2848       lwrite = 0;
2849       for (l=0; l<nzB; l++) {
2850         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2851           vals[lwrite]   = aworkB[l];
2852           cols[lwrite++] = ctmp;
2853         }
2854       }
2855       for (l=0; l<nzA; l++) {
2856         vals[lwrite]   = aworkA[l];
2857         cols[lwrite++] = cstart + cworkA[l];
2858       }
2859       for (l=0; l<nzB; l++) {
2860         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2861           vals[lwrite]   = aworkB[l];
2862           cols[lwrite++] = ctmp;
2863         }
2864       }
2865       vals     += ncols;
2866       cols     += ncols;
2867       rptr[i+1] = rptr[i] + ncols;
2868       if (rownz_max < ncols) rownz_max = ncols;
2869     }
2870     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2871   } else { /* only copy matrix values into sbuf_a */
2872     rptr    = sbuf_j;
2873     vals    = sbuf_a;
2874     rptr[0] = 0;
2875     for (i=0; i<rend-rstart; i++) {
2876       row    = i + rstart;
2877       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2878       ncols  = nzA + nzB;
2879       cworkB = b->j + b->i[i];
2880       aworkA = a->a + a->i[i];
2881       aworkB = b->a + b->i[i];
2882       lwrite = 0;
2883       for (l=0; l<nzB; l++) {
2884         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2885       }
2886       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2887       for (l=0; l<nzB; l++) {
2888         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2889       }
2890       vals     += ncols;
2891       rptr[i+1] = rptr[i] + ncols;
2892     }
2893   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2894 
2895   /* send nzlocal to others, and recv other's nzlocal */
2896   /*--------------------------------------------------*/
2897   if (reuse == MAT_INITIAL_MATRIX) {
2898     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2899 
2900     s_waits2 = s_waits3 + nsends;
2901     s_waits1 = s_waits2 + nsends;
2902     r_waits1 = s_waits1 + nsends;
2903     r_waits2 = r_waits1 + nrecvs;
2904     r_waits3 = r_waits2 + nrecvs;
2905   } else {
2906     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2907 
2908     r_waits3 = s_waits3 + nsends;
2909   }
2910 
2911   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2912   if (reuse == MAT_INITIAL_MATRIX) {
2913     /* get new tags to keep the communication clean */
2914     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2915     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2916     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2917 
2918     /* post receives of other's nzlocal */
2919     for (i=0; i<nrecvs; i++) {
2920       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2921     }
2922     /* send nzlocal to others */
2923     for (i=0; i<nsends; i++) {
2924       sbuf_nz[i] = nzlocal;
2925       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2926     }
2927     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2928     count = nrecvs;
2929     while (count) {
2930       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2931 
2932       recv_rank[imdex] = recv_status.MPI_SOURCE;
2933       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2934       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2935 
2936       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2937 
2938       rbuf_nz[imdex] += i + 2;
2939 
2940       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2941       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2942       count--;
2943     }
2944     /* wait on sends of nzlocal */
2945     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2946     /* send mat->i,j to others, and recv from other's */
2947     /*------------------------------------------------*/
2948     for (i=0; i<nsends; i++) {
2949       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2950       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2951     }
2952     /* wait on receives of mat->i,j */
2953     /*------------------------------*/
2954     count = nrecvs;
2955     while (count) {
2956       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2957       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2958       count--;
2959     }
2960     /* wait on sends of mat->i,j */
2961     /*---------------------------*/
2962     if (nsends) {
2963       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2964     }
2965   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2966 
2967   /* post receives, send and receive mat->a */
2968   /*----------------------------------------*/
2969   for (imdex=0; imdex<nrecvs; imdex++) {
2970     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2971   }
2972   for (i=0; i<nsends; i++) {
2973     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2974   }
2975   count = nrecvs;
2976   while (count) {
2977     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2978     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2979     count--;
2980   }
2981   if (nsends) {
2982     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2983   }
2984 
2985   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2986 
2987   /* create redundant matrix */
2988   /*-------------------------*/
2989   if (reuse == MAT_INITIAL_MATRIX) {
2990     const PetscInt *range;
2991     PetscInt       rstart_sub,rend_sub,mloc_sub;
2992 
2993     /* compute rownz_max for preallocation */
2994     for (imdex=0; imdex<nrecvs; imdex++) {
2995       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2996       rptr = rbuf_j[imdex];
2997       for (i=0; i<j; i++) {
2998         ncols = rptr[i+1] - rptr[i];
2999         if (rownz_max < ncols) rownz_max = ncols;
3000       }
3001     }
3002 
3003     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
3004 
3005     /* get local size of redundant matrix
3006        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
3007     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
3008     rstart_sub = range[nsubcomm*subrank];
3009     if (subrank+1 < subsize) { /* not the last proc in subcomm */
3010       rend_sub = range[nsubcomm*(subrank+1)];
3011     } else {
3012       rend_sub = mat->rmap->N;
3013     }
3014     mloc_sub = rend_sub - rstart_sub;
3015 
3016     if (M == N) {
3017       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
3018     } else { /* non-square matrix */
3019       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
3020     }
3021     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
3022     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
3023     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
3024     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
3025   } else {
3026     C = *matredundant;
3027   }
3028 
3029   /* insert local matrix entries */
3030   rptr = sbuf_j;
3031   cols = sbuf_j + rend-rstart + 1;
3032   vals = sbuf_a;
3033   for (i=0; i<rend-rstart; i++) {
3034     row   = i + rstart;
3035     ncols = rptr[i+1] - rptr[i];
3036     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3037     vals += ncols;
3038     cols += ncols;
3039   }
3040   /* insert received matrix entries */
3041   for (imdex=0; imdex<nrecvs; imdex++) {
3042     rstart = rowrange[recv_rank[imdex]];
3043     rend   = rowrange[recv_rank[imdex]+1];
3044     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
3045     rptr   = rbuf_j[imdex];
3046     cols   = rbuf_j[imdex] + rend-rstart + 1;
3047     vals   = rbuf_a[imdex];
3048     for (i=0; i<rend-rstart; i++) {
3049       row   = i + rstart;
3050       ncols = rptr[i+1] - rptr[i];
3051       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3052       vals += ncols;
3053       cols += ncols;
3054     }
3055   }
3056   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3057   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3058 
3059   if (reuse == MAT_INITIAL_MATRIX) {
3060     *matredundant = C;
3061 
3062     /* create a supporting struct and attach it to C for reuse */
3063     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
3064     if (subsize == 1) {
3065       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
3066       c->redundant = redund;
3067     } else {
3068       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
3069       c->redundant = redund;
3070     }
3071 
3072     redund->nzlocal   = nzlocal;
3073     redund->nsends    = nsends;
3074     redund->nrecvs    = nrecvs;
3075     redund->send_rank = send_rank;
3076     redund->recv_rank = recv_rank;
3077     redund->sbuf_nz   = sbuf_nz;
3078     redund->rbuf_nz   = rbuf_nz;
3079     redund->sbuf_j    = sbuf_j;
3080     redund->sbuf_a    = sbuf_a;
3081     redund->rbuf_j    = rbuf_j;
3082     redund->rbuf_a    = rbuf_a;
3083     redund->psubcomm  = NULL;
3084 
3085     redund->Destroy = C->ops->destroy;
3086     C->ops->destroy = MatDestroy_MatRedundant;
3087   }
3088   PetscFunctionReturn(0);
3089 }
3090 
3091 #undef __FUNCT__
3092 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
3093 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
3094 {
3095   PetscErrorCode ierr;
3096 
3097   PetscFunctionBegin;
3098   if (subcomm == MPI_COMM_NULL || subcomm == PETSC_COMM_SELF) { /* create psubcomm */
3099     MPI_Comm       comm;
3100     PetscSubcomm   psubcomm;
3101     PetscMPIInt    size,subsize;
3102 
3103     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3104     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3105     ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
3106     ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
3107     ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr);
3108     ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
3109 
3110     ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr);
3111 
3112     /* free psubcomm in MatDestroy_MatRedundant() */
3113     ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
3114     if (subsize == 1) {
3115       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
3116       c->redundant->psubcomm = psubcomm;
3117     } else {
3118       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3119       c->redundant->psubcomm = psubcomm ;
3120     }
3121   } else {
3122     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet");
3123   }
3124   PetscFunctionReturn(0);
3125 }
3126 
3127 #undef __FUNCT__
3128 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
3129 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3130 {
3131   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3132   PetscErrorCode ierr;
3133   PetscInt       i,*idxb = 0;
3134   PetscScalar    *va,*vb;
3135   Vec            vtmp;
3136 
3137   PetscFunctionBegin;
3138   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
3139   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3140   if (idx) {
3141     for (i=0; i<A->rmap->n; i++) {
3142       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3143     }
3144   }
3145 
3146   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3147   if (idx) {
3148     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3149   }
3150   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3151   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3152 
3153   for (i=0; i<A->rmap->n; i++) {
3154     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3155       va[i] = vb[i];
3156       if (idx) idx[i] = a->garray[idxb[i]];
3157     }
3158   }
3159 
3160   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3161   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3162   ierr = PetscFree(idxb);CHKERRQ(ierr);
3163   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3164   PetscFunctionReturn(0);
3165 }
3166 
3167 #undef __FUNCT__
3168 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
3169 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3170 {
3171   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3172   PetscErrorCode ierr;
3173   PetscInt       i,*idxb = 0;
3174   PetscScalar    *va,*vb;
3175   Vec            vtmp;
3176 
3177   PetscFunctionBegin;
3178   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
3179   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3180   if (idx) {
3181     for (i=0; i<A->cmap->n; i++) {
3182       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3183     }
3184   }
3185 
3186   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3187   if (idx) {
3188     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3189   }
3190   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3191   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3192 
3193   for (i=0; i<A->rmap->n; i++) {
3194     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3195       va[i] = vb[i];
3196       if (idx) idx[i] = a->garray[idxb[i]];
3197     }
3198   }
3199 
3200   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3201   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3202   ierr = PetscFree(idxb);CHKERRQ(ierr);
3203   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3204   PetscFunctionReturn(0);
3205 }
3206 
3207 #undef __FUNCT__
3208 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3209 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3210 {
3211   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3212   PetscInt       n      = A->rmap->n;
3213   PetscInt       cstart = A->cmap->rstart;
3214   PetscInt       *cmap  = mat->garray;
3215   PetscInt       *diagIdx, *offdiagIdx;
3216   Vec            diagV, offdiagV;
3217   PetscScalar    *a, *diagA, *offdiagA;
3218   PetscInt       r;
3219   PetscErrorCode ierr;
3220 
3221   PetscFunctionBegin;
3222   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3223   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3224   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3225   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3226   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3227   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3228   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3229   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3230   for (r = 0; r < n; ++r) {
3231     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3232       a[r]   = diagA[r];
3233       idx[r] = cstart + diagIdx[r];
3234     } else {
3235       a[r]   = offdiagA[r];
3236       idx[r] = cmap[offdiagIdx[r]];
3237     }
3238   }
3239   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3240   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3241   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3242   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3243   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3244   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3245   PetscFunctionReturn(0);
3246 }
3247 
3248 #undef __FUNCT__
3249 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3250 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3251 {
3252   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3253   PetscInt       n      = A->rmap->n;
3254   PetscInt       cstart = A->cmap->rstart;
3255   PetscInt       *cmap  = mat->garray;
3256   PetscInt       *diagIdx, *offdiagIdx;
3257   Vec            diagV, offdiagV;
3258   PetscScalar    *a, *diagA, *offdiagA;
3259   PetscInt       r;
3260   PetscErrorCode ierr;
3261 
3262   PetscFunctionBegin;
3263   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3264   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3265   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3266   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3267   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3268   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3269   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3270   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3271   for (r = 0; r < n; ++r) {
3272     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3273       a[r]   = diagA[r];
3274       idx[r] = cstart + diagIdx[r];
3275     } else {
3276       a[r]   = offdiagA[r];
3277       idx[r] = cmap[offdiagIdx[r]];
3278     }
3279   }
3280   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3281   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3282   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3283   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3284   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3285   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3286   PetscFunctionReturn(0);
3287 }
3288 
3289 #undef __FUNCT__
3290 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3291 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3292 {
3293   PetscErrorCode ierr;
3294   Mat            *dummy;
3295 
3296   PetscFunctionBegin;
3297   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3298   *newmat = *dummy;
3299   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3300   PetscFunctionReturn(0);
3301 }
3302 
3303 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3304 
3305 #undef __FUNCT__
3306 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3307 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3308 {
3309   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3310   PetscErrorCode ierr;
3311 
3312   PetscFunctionBegin;
3313   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3314   PetscFunctionReturn(0);
3315 }
3316 
3317 #undef __FUNCT__
3318 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3319 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3320 {
3321   PetscErrorCode ierr;
3322   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3323 
3324   PetscFunctionBegin;
3325   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3326   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3327   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3329   PetscFunctionReturn(0);
3330 }
3331 
3332 /* -------------------------------------------------------------------*/
3333 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3334                                        MatGetRow_MPIAIJ,
3335                                        MatRestoreRow_MPIAIJ,
3336                                        MatMult_MPIAIJ,
3337                                 /* 4*/ MatMultAdd_MPIAIJ,
3338                                        MatMultTranspose_MPIAIJ,
3339                                        MatMultTransposeAdd_MPIAIJ,
3340 #if defined(PETSC_HAVE_PBGL)
3341                                        MatSolve_MPIAIJ,
3342 #else
3343                                        0,
3344 #endif
3345                                        0,
3346                                        0,
3347                                 /*10*/ 0,
3348                                        0,
3349                                        0,
3350                                        MatSOR_MPIAIJ,
3351                                        MatTranspose_MPIAIJ,
3352                                 /*15*/ MatGetInfo_MPIAIJ,
3353                                        MatEqual_MPIAIJ,
3354                                        MatGetDiagonal_MPIAIJ,
3355                                        MatDiagonalScale_MPIAIJ,
3356                                        MatNorm_MPIAIJ,
3357                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3358                                        MatAssemblyEnd_MPIAIJ,
3359                                        MatSetOption_MPIAIJ,
3360                                        MatZeroEntries_MPIAIJ,
3361                                 /*24*/ MatZeroRows_MPIAIJ,
3362                                        0,
3363 #if defined(PETSC_HAVE_PBGL)
3364                                        0,
3365 #else
3366                                        0,
3367 #endif
3368                                        0,
3369                                        0,
3370                                 /*29*/ MatSetUp_MPIAIJ,
3371 #if defined(PETSC_HAVE_PBGL)
3372                                        0,
3373 #else
3374                                        0,
3375 #endif
3376                                        0,
3377                                        0,
3378                                        0,
3379                                 /*34*/ MatDuplicate_MPIAIJ,
3380                                        0,
3381                                        0,
3382                                        0,
3383                                        0,
3384                                 /*39*/ MatAXPY_MPIAIJ,
3385                                        MatGetSubMatrices_MPIAIJ,
3386                                        MatIncreaseOverlap_MPIAIJ,
3387                                        MatGetValues_MPIAIJ,
3388                                        MatCopy_MPIAIJ,
3389                                 /*44*/ MatGetRowMax_MPIAIJ,
3390                                        MatScale_MPIAIJ,
3391                                        0,
3392                                        0,
3393                                        MatZeroRowsColumns_MPIAIJ,
3394                                 /*49*/ MatSetRandom_MPIAIJ,
3395                                        0,
3396                                        0,
3397                                        0,
3398                                        0,
3399                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3400                                        0,
3401                                        MatSetUnfactored_MPIAIJ,
3402                                        MatPermute_MPIAIJ,
3403                                        0,
3404                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3405                                        MatDestroy_MPIAIJ,
3406                                        MatView_MPIAIJ,
3407                                        0,
3408                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3409                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3410                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3411                                        0,
3412                                        0,
3413                                        0,
3414                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3415                                        MatGetRowMinAbs_MPIAIJ,
3416                                        0,
3417                                        MatSetColoring_MPIAIJ,
3418                                        0,
3419                                        MatSetValuesAdifor_MPIAIJ,
3420                                 /*75*/ MatFDColoringApply_AIJ,
3421                                        0,
3422                                        0,
3423                                        0,
3424                                        MatFindZeroDiagonals_MPIAIJ,
3425                                 /*80*/ 0,
3426                                        0,
3427                                        0,
3428                                 /*83*/ MatLoad_MPIAIJ,
3429                                        0,
3430                                        0,
3431                                        0,
3432                                        0,
3433                                        0,
3434                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3435                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3436                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3437                                        MatPtAP_MPIAIJ_MPIAIJ,
3438                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3439                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3440                                        0,
3441                                        0,
3442                                        0,
3443                                        0,
3444                                 /*99*/ 0,
3445                                        0,
3446                                        0,
3447                                        MatConjugate_MPIAIJ,
3448                                        0,
3449                                 /*104*/MatSetValuesRow_MPIAIJ,
3450                                        MatRealPart_MPIAIJ,
3451                                        MatImaginaryPart_MPIAIJ,
3452                                        0,
3453                                        0,
3454                                 /*109*/0,
3455                                        MatGetRedundantMatrix_MPIAIJ,
3456                                        MatGetRowMin_MPIAIJ,
3457                                        0,
3458                                        0,
3459                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3460                                        0,
3461                                        0,
3462                                        0,
3463                                        0,
3464                                 /*119*/0,
3465                                        0,
3466                                        0,
3467                                        0,
3468                                        MatGetMultiProcBlock_MPIAIJ,
3469                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3470                                        MatGetColumnNorms_MPIAIJ,
3471                                        MatInvertBlockDiagonal_MPIAIJ,
3472                                        0,
3473                                        MatGetSubMatricesParallel_MPIAIJ,
3474                                 /*129*/0,
3475                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3476                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3477                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3478                                        0,
3479                                 /*134*/0,
3480                                        0,
3481                                        0,
3482                                        0,
3483                                        0,
3484                                 /*139*/0,
3485                                        0
3486 };
3487 
3488 /* ----------------------------------------------------------------------------------------*/
3489 
3490 #undef __FUNCT__
3491 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3492 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3493 {
3494   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3495   PetscErrorCode ierr;
3496 
3497   PetscFunctionBegin;
3498   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3499   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3500   PetscFunctionReturn(0);
3501 }
3502 
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3505 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3506 {
3507   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3508   PetscErrorCode ierr;
3509 
3510   PetscFunctionBegin;
3511   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3512   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3513   PetscFunctionReturn(0);
3514 }
3515 
3516 #undef __FUNCT__
3517 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3518 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3519 {
3520   Mat_MPIAIJ     *b;
3521   PetscErrorCode ierr;
3522 
3523   PetscFunctionBegin;
3524   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3525   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3526   b = (Mat_MPIAIJ*)B->data;
3527 
3528   if (!B->preallocated) {
3529     /* Explicitly create 2 MATSEQAIJ matrices. */
3530     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3531     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3532     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3533     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3534     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3535     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3536     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3537     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3538     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3539     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3540   }
3541 
3542   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3543   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3544   B->preallocated = PETSC_TRUE;
3545   PetscFunctionReturn(0);
3546 }
3547 
3548 #undef __FUNCT__
3549 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3550 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3551 {
3552   Mat            mat;
3553   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3554   PetscErrorCode ierr;
3555 
3556   PetscFunctionBegin;
3557   *newmat = 0;
3558   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3559   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3560   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3561   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3562   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3563   a       = (Mat_MPIAIJ*)mat->data;
3564 
3565   mat->factortype   = matin->factortype;
3566   mat->rmap->bs     = matin->rmap->bs;
3567   mat->cmap->bs     = matin->cmap->bs;
3568   mat->assembled    = PETSC_TRUE;
3569   mat->insertmode   = NOT_SET_VALUES;
3570   mat->preallocated = PETSC_TRUE;
3571 
3572   a->size         = oldmat->size;
3573   a->rank         = oldmat->rank;
3574   a->donotstash   = oldmat->donotstash;
3575   a->roworiented  = oldmat->roworiented;
3576   a->rowindices   = 0;
3577   a->rowvalues    = 0;
3578   a->getrowactive = PETSC_FALSE;
3579 
3580   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3581   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3582 
3583   if (oldmat->colmap) {
3584 #if defined(PETSC_USE_CTABLE)
3585     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3586 #else
3587     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3588     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3589     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3590 #endif
3591   } else a->colmap = 0;
3592   if (oldmat->garray) {
3593     PetscInt len;
3594     len  = oldmat->B->cmap->n;
3595     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3596     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3597     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3598   } else a->garray = 0;
3599 
3600   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3601   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3602   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3603   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3604   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3605   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3606   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3607   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3608   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3609   *newmat = mat;
3610   PetscFunctionReturn(0);
3611 }
3612 
3613 
3614 
3615 #undef __FUNCT__
3616 #define __FUNCT__ "MatLoad_MPIAIJ"
3617 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3618 {
3619   PetscScalar    *vals,*svals;
3620   MPI_Comm       comm;
3621   PetscErrorCode ierr;
3622   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3623   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3624   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3625   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3626   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3627   int            fd;
3628   PetscInt       bs = 1;
3629 
3630   PetscFunctionBegin;
3631   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3632   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3633   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3634   if (!rank) {
3635     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3636     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3637     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3638   }
3639 
3640   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3641   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3642   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3643 
3644   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3645 
3646   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3647   M    = header[1]; N = header[2];
3648   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3649   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3650   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3651 
3652   /* If global sizes are set, check if they are consistent with that given in the file */
3653   if (sizesset) {
3654     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3655   }
3656   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3657   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3658 
3659   /* determine ownership of all (block) rows */
3660   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3661   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3662   else m = newMat->rmap->n; /* Set by user */
3663 
3664   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3665   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3666 
3667   /* First process needs enough room for process with most rows */
3668   if (!rank) {
3669     mmax = rowners[1];
3670     for (i=2; i<=size; i++) {
3671       mmax = PetscMax(mmax, rowners[i]);
3672     }
3673   } else mmax = -1;             /* unused, but compilers complain */
3674 
3675   rowners[0] = 0;
3676   for (i=2; i<=size; i++) {
3677     rowners[i] += rowners[i-1];
3678   }
3679   rstart = rowners[rank];
3680   rend   = rowners[rank+1];
3681 
3682   /* distribute row lengths to all processors */
3683   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3684   if (!rank) {
3685     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3686     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3687     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3688     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3689     for (j=0; j<m; j++) {
3690       procsnz[0] += ourlens[j];
3691     }
3692     for (i=1; i<size; i++) {
3693       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3694       /* calculate the number of nonzeros on each processor */
3695       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3696         procsnz[i] += rowlengths[j];
3697       }
3698       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3699     }
3700     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3701   } else {
3702     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3703   }
3704 
3705   if (!rank) {
3706     /* determine max buffer needed and allocate it */
3707     maxnz = 0;
3708     for (i=0; i<size; i++) {
3709       maxnz = PetscMax(maxnz,procsnz[i]);
3710     }
3711     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3712 
3713     /* read in my part of the matrix column indices  */
3714     nz   = procsnz[0];
3715     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3716     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3717 
3718     /* read in every one elses and ship off */
3719     for (i=1; i<size; i++) {
3720       nz   = procsnz[i];
3721       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3722       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3723     }
3724     ierr = PetscFree(cols);CHKERRQ(ierr);
3725   } else {
3726     /* determine buffer space needed for message */
3727     nz = 0;
3728     for (i=0; i<m; i++) {
3729       nz += ourlens[i];
3730     }
3731     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3732 
3733     /* receive message of column indices*/
3734     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3735   }
3736 
3737   /* determine column ownership if matrix is not square */
3738   if (N != M) {
3739     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3740     else n = newMat->cmap->n;
3741     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3742     cstart = cend - n;
3743   } else {
3744     cstart = rstart;
3745     cend   = rend;
3746     n      = cend - cstart;
3747   }
3748 
3749   /* loop over local rows, determining number of off diagonal entries */
3750   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3751   jj   = 0;
3752   for (i=0; i<m; i++) {
3753     for (j=0; j<ourlens[i]; j++) {
3754       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3755       jj++;
3756     }
3757   }
3758 
3759   for (i=0; i<m; i++) {
3760     ourlens[i] -= offlens[i];
3761   }
3762   if (!sizesset) {
3763     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3764   }
3765 
3766   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3767 
3768   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3769 
3770   for (i=0; i<m; i++) {
3771     ourlens[i] += offlens[i];
3772   }
3773 
3774   if (!rank) {
3775     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3776 
3777     /* read in my part of the matrix numerical values  */
3778     nz   = procsnz[0];
3779     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3780 
3781     /* insert into matrix */
3782     jj      = rstart;
3783     smycols = mycols;
3784     svals   = vals;
3785     for (i=0; i<m; i++) {
3786       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3787       smycols += ourlens[i];
3788       svals   += ourlens[i];
3789       jj++;
3790     }
3791 
3792     /* read in other processors and ship out */
3793     for (i=1; i<size; i++) {
3794       nz   = procsnz[i];
3795       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3796       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3797     }
3798     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3799   } else {
3800     /* receive numeric values */
3801     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3802 
3803     /* receive message of values*/
3804     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3805 
3806     /* insert into matrix */
3807     jj      = rstart;
3808     smycols = mycols;
3809     svals   = vals;
3810     for (i=0; i<m; i++) {
3811       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3812       smycols += ourlens[i];
3813       svals   += ourlens[i];
3814       jj++;
3815     }
3816   }
3817   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3818   ierr = PetscFree(vals);CHKERRQ(ierr);
3819   ierr = PetscFree(mycols);CHKERRQ(ierr);
3820   ierr = PetscFree(rowners);CHKERRQ(ierr);
3821   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3822   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3823   PetscFunctionReturn(0);
3824 }
3825 
3826 #undef __FUNCT__
3827 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3828 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3829 {
3830   PetscErrorCode ierr;
3831   IS             iscol_local;
3832   PetscInt       csize;
3833 
3834   PetscFunctionBegin;
3835   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3836   if (call == MAT_REUSE_MATRIX) {
3837     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3838     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3839   } else {
3840     PetscInt cbs;
3841     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3842     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3843     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3844   }
3845   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3846   if (call == MAT_INITIAL_MATRIX) {
3847     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3848     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3849   }
3850   PetscFunctionReturn(0);
3851 }
3852 
3853 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3854 #undef __FUNCT__
3855 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3856 /*
3857     Not great since it makes two copies of the submatrix, first an SeqAIJ
3858   in local and then by concatenating the local matrices the end result.
3859   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3860 
3861   Note: This requires a sequential iscol with all indices.
3862 */
3863 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3864 {
3865   PetscErrorCode ierr;
3866   PetscMPIInt    rank,size;
3867   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3868   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3869   PetscBool      allcolumns, colflag;
3870   Mat            M,Mreuse;
3871   MatScalar      *vwork,*aa;
3872   MPI_Comm       comm;
3873   Mat_SeqAIJ     *aij;
3874 
3875   PetscFunctionBegin;
3876   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3877   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3878   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3879 
3880   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3881   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3882   if (colflag && ncol == mat->cmap->N) {
3883     allcolumns = PETSC_TRUE;
3884   } else {
3885     allcolumns = PETSC_FALSE;
3886   }
3887   if (call ==  MAT_REUSE_MATRIX) {
3888     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3889     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3890     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3891   } else {
3892     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3893   }
3894 
3895   /*
3896       m - number of local rows
3897       n - number of columns (same on all processors)
3898       rstart - first row in new global matrix generated
3899   */
3900   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3901   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3902   if (call == MAT_INITIAL_MATRIX) {
3903     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3904     ii  = aij->i;
3905     jj  = aij->j;
3906 
3907     /*
3908         Determine the number of non-zeros in the diagonal and off-diagonal
3909         portions of the matrix in order to do correct preallocation
3910     */
3911 
3912     /* first get start and end of "diagonal" columns */
3913     if (csize == PETSC_DECIDE) {
3914       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3915       if (mglobal == n) { /* square matrix */
3916         nlocal = m;
3917       } else {
3918         nlocal = n/size + ((n % size) > rank);
3919       }
3920     } else {
3921       nlocal = csize;
3922     }
3923     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3924     rstart = rend - nlocal;
3925     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3926 
3927     /* next, compute all the lengths */
3928     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3929     olens = dlens + m;
3930     for (i=0; i<m; i++) {
3931       jend = ii[i+1] - ii[i];
3932       olen = 0;
3933       dlen = 0;
3934       for (j=0; j<jend; j++) {
3935         if (*jj < rstart || *jj >= rend) olen++;
3936         else dlen++;
3937         jj++;
3938       }
3939       olens[i] = olen;
3940       dlens[i] = dlen;
3941     }
3942     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3943     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3944     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3945     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3946     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3947     ierr = PetscFree(dlens);CHKERRQ(ierr);
3948   } else {
3949     PetscInt ml,nl;
3950 
3951     M    = *newmat;
3952     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3953     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3954     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3955     /*
3956          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3957        rather than the slower MatSetValues().
3958     */
3959     M->was_assembled = PETSC_TRUE;
3960     M->assembled     = PETSC_FALSE;
3961   }
3962   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3963   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3964   ii   = aij->i;
3965   jj   = aij->j;
3966   aa   = aij->a;
3967   for (i=0; i<m; i++) {
3968     row   = rstart + i;
3969     nz    = ii[i+1] - ii[i];
3970     cwork = jj;     jj += nz;
3971     vwork = aa;     aa += nz;
3972     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3973   }
3974 
3975   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3976   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3977   *newmat = M;
3978 
3979   /* save submatrix used in processor for next request */
3980   if (call ==  MAT_INITIAL_MATRIX) {
3981     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3982     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3983   }
3984   PetscFunctionReturn(0);
3985 }
3986 
3987 #undef __FUNCT__
3988 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3989 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3990 {
3991   PetscInt       m,cstart, cend,j,nnz,i,d;
3992   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3993   const PetscInt *JJ;
3994   PetscScalar    *values;
3995   PetscErrorCode ierr;
3996 
3997   PetscFunctionBegin;
3998   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3999 
4000   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
4001   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
4002   m      = B->rmap->n;
4003   cstart = B->cmap->rstart;
4004   cend   = B->cmap->rend;
4005   rstart = B->rmap->rstart;
4006 
4007   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
4008 
4009 #if defined(PETSC_USE_DEBUGGING)
4010   for (i=0; i<m; i++) {
4011     nnz = Ii[i+1]- Ii[i];
4012     JJ  = J + Ii[i];
4013     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
4014     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
4015     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4016   }
4017 #endif
4018 
4019   for (i=0; i<m; i++) {
4020     nnz     = Ii[i+1]- Ii[i];
4021     JJ      = J + Ii[i];
4022     nnz_max = PetscMax(nnz_max,nnz);
4023     d       = 0;
4024     for (j=0; j<nnz; j++) {
4025       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4026     }
4027     d_nnz[i] = d;
4028     o_nnz[i] = nnz - d;
4029   }
4030   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4031   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4032 
4033   if (v) values = (PetscScalar*)v;
4034   else {
4035     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
4036     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
4037   }
4038 
4039   for (i=0; i<m; i++) {
4040     ii   = i + rstart;
4041     nnz  = Ii[i+1]- Ii[i];
4042     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
4043   }
4044   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4045   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4046 
4047   if (!v) {
4048     ierr = PetscFree(values);CHKERRQ(ierr);
4049   }
4050   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4051   PetscFunctionReturn(0);
4052 }
4053 
4054 #undef __FUNCT__
4055 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
4056 /*@
4057    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4058    (the default parallel PETSc format).
4059 
4060    Collective on MPI_Comm
4061 
4062    Input Parameters:
4063 +  B - the matrix
4064 .  i - the indices into j for the start of each local row (starts with zero)
4065 .  j - the column indices for each local row (starts with zero)
4066 -  v - optional values in the matrix
4067 
4068    Level: developer
4069 
4070    Notes:
4071        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4072      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4073      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4074 
4075        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4076 
4077        The format which is used for the sparse matrix input, is equivalent to a
4078     row-major ordering.. i.e for the following matrix, the input data expected is
4079     as shown:
4080 
4081         1 0 0
4082         2 0 3     P0
4083        -------
4084         4 5 6     P1
4085 
4086      Process0 [P0]: rows_owned=[0,1]
4087         i =  {0,1,3}  [size = nrow+1  = 2+1]
4088         j =  {0,0,2}  [size = nz = 6]
4089         v =  {1,2,3}  [size = nz = 6]
4090 
4091      Process1 [P1]: rows_owned=[2]
4092         i =  {0,3}    [size = nrow+1  = 1+1]
4093         j =  {0,1,2}  [size = nz = 6]
4094         v =  {4,5,6}  [size = nz = 6]
4095 
4096 .keywords: matrix, aij, compressed row, sparse, parallel
4097 
4098 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
4099           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4100 @*/
4101 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4102 {
4103   PetscErrorCode ierr;
4104 
4105   PetscFunctionBegin;
4106   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4107   PetscFunctionReturn(0);
4108 }
4109 
4110 #undef __FUNCT__
4111 #define __FUNCT__ "MatMPIAIJSetPreallocation"
4112 /*@C
4113    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4114    (the default parallel PETSc format).  For good matrix assembly performance
4115    the user should preallocate the matrix storage by setting the parameters
4116    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4117    performance can be increased by more than a factor of 50.
4118 
4119    Collective on MPI_Comm
4120 
4121    Input Parameters:
4122 +  A - the matrix
4123 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4124            (same value is used for all local rows)
4125 .  d_nnz - array containing the number of nonzeros in the various rows of the
4126            DIAGONAL portion of the local submatrix (possibly different for each row)
4127            or NULL, if d_nz is used to specify the nonzero structure.
4128            The size of this array is equal to the number of local rows, i.e 'm'.
4129            For matrices that will be factored, you must leave room for (and set)
4130            the diagonal entry even if it is zero.
4131 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4132            submatrix (same value is used for all local rows).
4133 -  o_nnz - array containing the number of nonzeros in the various rows of the
4134            OFF-DIAGONAL portion of the local submatrix (possibly different for
4135            each row) or NULL, if o_nz is used to specify the nonzero
4136            structure. The size of this array is equal to the number
4137            of local rows, i.e 'm'.
4138 
4139    If the *_nnz parameter is given then the *_nz parameter is ignored
4140 
4141    The AIJ format (also called the Yale sparse matrix format or
4142    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4143    storage.  The stored row and column indices begin with zero.
4144    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
4145 
4146    The parallel matrix is partitioned such that the first m0 rows belong to
4147    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4148    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4149 
4150    The DIAGONAL portion of the local submatrix of a processor can be defined
4151    as the submatrix which is obtained by extraction the part corresponding to
4152    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4153    first row that belongs to the processor, r2 is the last row belonging to
4154    the this processor, and c1-c2 is range of indices of the local part of a
4155    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4156    common case of a square matrix, the row and column ranges are the same and
4157    the DIAGONAL part is also square. The remaining portion of the local
4158    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4159 
4160    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4161 
4162    You can call MatGetInfo() to get information on how effective the preallocation was;
4163    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4164    You can also run with the option -info and look for messages with the string
4165    malloc in them to see if additional memory allocation was needed.
4166 
4167    Example usage:
4168 
4169    Consider the following 8x8 matrix with 34 non-zero values, that is
4170    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4171    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4172    as follows:
4173 
4174 .vb
4175             1  2  0  |  0  3  0  |  0  4
4176     Proc0   0  5  6  |  7  0  0  |  8  0
4177             9  0 10  | 11  0  0  | 12  0
4178     -------------------------------------
4179            13  0 14  | 15 16 17  |  0  0
4180     Proc1   0 18  0  | 19 20 21  |  0  0
4181             0  0  0  | 22 23  0  | 24  0
4182     -------------------------------------
4183     Proc2  25 26 27  |  0  0 28  | 29  0
4184            30  0  0  | 31 32 33  |  0 34
4185 .ve
4186 
4187    This can be represented as a collection of submatrices as:
4188 
4189 .vb
4190       A B C
4191       D E F
4192       G H I
4193 .ve
4194 
4195    Where the submatrices A,B,C are owned by proc0, D,E,F are
4196    owned by proc1, G,H,I are owned by proc2.
4197 
4198    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4199    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4200    The 'M','N' parameters are 8,8, and have the same values on all procs.
4201 
4202    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4203    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4204    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4205    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4206    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4207    matrix, ans [DF] as another SeqAIJ matrix.
4208 
4209    When d_nz, o_nz parameters are specified, d_nz storage elements are
4210    allocated for every row of the local diagonal submatrix, and o_nz
4211    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4212    One way to choose d_nz and o_nz is to use the max nonzerors per local
4213    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4214    In this case, the values of d_nz,o_nz are:
4215 .vb
4216      proc0 : dnz = 2, o_nz = 2
4217      proc1 : dnz = 3, o_nz = 2
4218      proc2 : dnz = 1, o_nz = 4
4219 .ve
4220    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4221    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4222    for proc3. i.e we are using 12+15+10=37 storage locations to store
4223    34 values.
4224 
4225    When d_nnz, o_nnz parameters are specified, the storage is specified
4226    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4227    In the above case the values for d_nnz,o_nnz are:
4228 .vb
4229      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4230      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4231      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4232 .ve
4233    Here the space allocated is sum of all the above values i.e 34, and
4234    hence pre-allocation is perfect.
4235 
4236    Level: intermediate
4237 
4238 .keywords: matrix, aij, compressed row, sparse, parallel
4239 
4240 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4241           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4242 @*/
4243 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4244 {
4245   PetscErrorCode ierr;
4246 
4247   PetscFunctionBegin;
4248   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4249   PetscValidType(B,1);
4250   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4251   PetscFunctionReturn(0);
4252 }
4253 
4254 #undef __FUNCT__
4255 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4256 /*@
4257      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4258          CSR format the local rows.
4259 
4260    Collective on MPI_Comm
4261 
4262    Input Parameters:
4263 +  comm - MPI communicator
4264 .  m - number of local rows (Cannot be PETSC_DECIDE)
4265 .  n - This value should be the same as the local size used in creating the
4266        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4267        calculated if N is given) For square matrices n is almost always m.
4268 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4269 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4270 .   i - row indices
4271 .   j - column indices
4272 -   a - matrix values
4273 
4274    Output Parameter:
4275 .   mat - the matrix
4276 
4277    Level: intermediate
4278 
4279    Notes:
4280        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4281      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4282      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4283 
4284        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4285 
4286        The format which is used for the sparse matrix input, is equivalent to a
4287     row-major ordering.. i.e for the following matrix, the input data expected is
4288     as shown:
4289 
4290         1 0 0
4291         2 0 3     P0
4292        -------
4293         4 5 6     P1
4294 
4295      Process0 [P0]: rows_owned=[0,1]
4296         i =  {0,1,3}  [size = nrow+1  = 2+1]
4297         j =  {0,0,2}  [size = nz = 6]
4298         v =  {1,2,3}  [size = nz = 6]
4299 
4300      Process1 [P1]: rows_owned=[2]
4301         i =  {0,3}    [size = nrow+1  = 1+1]
4302         j =  {0,1,2}  [size = nz = 6]
4303         v =  {4,5,6}  [size = nz = 6]
4304 
4305 .keywords: matrix, aij, compressed row, sparse, parallel
4306 
4307 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4308           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4309 @*/
4310 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4311 {
4312   PetscErrorCode ierr;
4313 
4314   PetscFunctionBegin;
4315   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4316   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4317   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4318   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4319   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4320   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4321   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4322   PetscFunctionReturn(0);
4323 }
4324 
4325 #undef __FUNCT__
4326 #define __FUNCT__ "MatCreateAIJ"
4327 /*@C
4328    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4329    (the default parallel PETSc format).  For good matrix assembly performance
4330    the user should preallocate the matrix storage by setting the parameters
4331    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4332    performance can be increased by more than a factor of 50.
4333 
4334    Collective on MPI_Comm
4335 
4336    Input Parameters:
4337 +  comm - MPI communicator
4338 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4339            This value should be the same as the local size used in creating the
4340            y vector for the matrix-vector product y = Ax.
4341 .  n - This value should be the same as the local size used in creating the
4342        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4343        calculated if N is given) For square matrices n is almost always m.
4344 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4345 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4346 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4347            (same value is used for all local rows)
4348 .  d_nnz - array containing the number of nonzeros in the various rows of the
4349            DIAGONAL portion of the local submatrix (possibly different for each row)
4350            or NULL, if d_nz is used to specify the nonzero structure.
4351            The size of this array is equal to the number of local rows, i.e 'm'.
4352 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4353            submatrix (same value is used for all local rows).
4354 -  o_nnz - array containing the number of nonzeros in the various rows of the
4355            OFF-DIAGONAL portion of the local submatrix (possibly different for
4356            each row) or NULL, if o_nz is used to specify the nonzero
4357            structure. The size of this array is equal to the number
4358            of local rows, i.e 'm'.
4359 
4360    Output Parameter:
4361 .  A - the matrix
4362 
4363    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4364    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4365    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4366 
4367    Notes:
4368    If the *_nnz parameter is given then the *_nz parameter is ignored
4369 
4370    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4371    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4372    storage requirements for this matrix.
4373 
4374    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4375    processor than it must be used on all processors that share the object for
4376    that argument.
4377 
4378    The user MUST specify either the local or global matrix dimensions
4379    (possibly both).
4380 
4381    The parallel matrix is partitioned across processors such that the
4382    first m0 rows belong to process 0, the next m1 rows belong to
4383    process 1, the next m2 rows belong to process 2 etc.. where
4384    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4385    values corresponding to [m x N] submatrix.
4386 
4387    The columns are logically partitioned with the n0 columns belonging
4388    to 0th partition, the next n1 columns belonging to the next
4389    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4390 
4391    The DIAGONAL portion of the local submatrix on any given processor
4392    is the submatrix corresponding to the rows and columns m,n
4393    corresponding to the given processor. i.e diagonal matrix on
4394    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4395    etc. The remaining portion of the local submatrix [m x (N-n)]
4396    constitute the OFF-DIAGONAL portion. The example below better
4397    illustrates this concept.
4398 
4399    For a square global matrix we define each processor's diagonal portion
4400    to be its local rows and the corresponding columns (a square submatrix);
4401    each processor's off-diagonal portion encompasses the remainder of the
4402    local matrix (a rectangular submatrix).
4403 
4404    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4405 
4406    When calling this routine with a single process communicator, a matrix of
4407    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4408    type of communicator, use the construction mechanism:
4409      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4410 
4411    By default, this format uses inodes (identical nodes) when possible.
4412    We search for consecutive rows with the same nonzero structure, thereby
4413    reusing matrix information to achieve increased efficiency.
4414 
4415    Options Database Keys:
4416 +  -mat_no_inode  - Do not use inodes
4417 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4418 -  -mat_aij_oneindex - Internally use indexing starting at 1
4419         rather than 0.  Note that when calling MatSetValues(),
4420         the user still MUST index entries starting at 0!
4421 
4422 
4423    Example usage:
4424 
4425    Consider the following 8x8 matrix with 34 non-zero values, that is
4426    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4427    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4428    as follows:
4429 
4430 .vb
4431             1  2  0  |  0  3  0  |  0  4
4432     Proc0   0  5  6  |  7  0  0  |  8  0
4433             9  0 10  | 11  0  0  | 12  0
4434     -------------------------------------
4435            13  0 14  | 15 16 17  |  0  0
4436     Proc1   0 18  0  | 19 20 21  |  0  0
4437             0  0  0  | 22 23  0  | 24  0
4438     -------------------------------------
4439     Proc2  25 26 27  |  0  0 28  | 29  0
4440            30  0  0  | 31 32 33  |  0 34
4441 .ve
4442 
4443    This can be represented as a collection of submatrices as:
4444 
4445 .vb
4446       A B C
4447       D E F
4448       G H I
4449 .ve
4450 
4451    Where the submatrices A,B,C are owned by proc0, D,E,F are
4452    owned by proc1, G,H,I are owned by proc2.
4453 
4454    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4455    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4456    The 'M','N' parameters are 8,8, and have the same values on all procs.
4457 
4458    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4459    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4460    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4461    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4462    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4463    matrix, ans [DF] as another SeqAIJ matrix.
4464 
4465    When d_nz, o_nz parameters are specified, d_nz storage elements are
4466    allocated for every row of the local diagonal submatrix, and o_nz
4467    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4468    One way to choose d_nz and o_nz is to use the max nonzerors per local
4469    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4470    In this case, the values of d_nz,o_nz are:
4471 .vb
4472      proc0 : dnz = 2, o_nz = 2
4473      proc1 : dnz = 3, o_nz = 2
4474      proc2 : dnz = 1, o_nz = 4
4475 .ve
4476    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4477    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4478    for proc3. i.e we are using 12+15+10=37 storage locations to store
4479    34 values.
4480 
4481    When d_nnz, o_nnz parameters are specified, the storage is specified
4482    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4483    In the above case the values for d_nnz,o_nnz are:
4484 .vb
4485      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4486      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4487      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4488 .ve
4489    Here the space allocated is sum of all the above values i.e 34, and
4490    hence pre-allocation is perfect.
4491 
4492    Level: intermediate
4493 
4494 .keywords: matrix, aij, compressed row, sparse, parallel
4495 
4496 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4497           MPIAIJ, MatCreateMPIAIJWithArrays()
4498 @*/
4499 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4500 {
4501   PetscErrorCode ierr;
4502   PetscMPIInt    size;
4503 
4504   PetscFunctionBegin;
4505   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4506   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4507   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4508   if (size > 1) {
4509     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4510     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4511   } else {
4512     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4513     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4514   }
4515   PetscFunctionReturn(0);
4516 }
4517 
4518 #undef __FUNCT__
4519 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4520 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4521 {
4522   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4523 
4524   PetscFunctionBegin;
4525   *Ad     = a->A;
4526   *Ao     = a->B;
4527   *colmap = a->garray;
4528   PetscFunctionReturn(0);
4529 }
4530 
4531 #undef __FUNCT__
4532 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4533 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4534 {
4535   PetscErrorCode ierr;
4536   PetscInt       i;
4537   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4538 
4539   PetscFunctionBegin;
4540   if (coloring->ctype == IS_COLORING_GLOBAL) {
4541     ISColoringValue *allcolors,*colors;
4542     ISColoring      ocoloring;
4543 
4544     /* set coloring for diagonal portion */
4545     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4546 
4547     /* set coloring for off-diagonal portion */
4548     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4549     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4550     for (i=0; i<a->B->cmap->n; i++) {
4551       colors[i] = allcolors[a->garray[i]];
4552     }
4553     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4554     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4555     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4556     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4557   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4558     ISColoringValue *colors;
4559     PetscInt        *larray;
4560     ISColoring      ocoloring;
4561 
4562     /* set coloring for diagonal portion */
4563     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4564     for (i=0; i<a->A->cmap->n; i++) {
4565       larray[i] = i + A->cmap->rstart;
4566     }
4567     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4568     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4569     for (i=0; i<a->A->cmap->n; i++) {
4570       colors[i] = coloring->colors[larray[i]];
4571     }
4572     ierr = PetscFree(larray);CHKERRQ(ierr);
4573     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4574     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4575     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4576 
4577     /* set coloring for off-diagonal portion */
4578     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4579     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4580     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4581     for (i=0; i<a->B->cmap->n; i++) {
4582       colors[i] = coloring->colors[larray[i]];
4583     }
4584     ierr = PetscFree(larray);CHKERRQ(ierr);
4585     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4586     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4587     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4588   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4589   PetscFunctionReturn(0);
4590 }
4591 
4592 #undef __FUNCT__
4593 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4594 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4595 {
4596   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4597   PetscErrorCode ierr;
4598 
4599   PetscFunctionBegin;
4600   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4601   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4602   PetscFunctionReturn(0);
4603 }
4604 
4605 #undef __FUNCT__
4606 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4607 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4608 {
4609   PetscErrorCode ierr;
4610   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4611   PetscInt       *indx;
4612 
4613   PetscFunctionBegin;
4614   /* This routine will ONLY return MPIAIJ type matrix */
4615   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4616   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4617   if (n == PETSC_DECIDE) {
4618     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4619   }
4620   /* Check sum(n) = N */
4621   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4622   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4623 
4624   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4625   rstart -= m;
4626 
4627   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4628   for (i=0; i<m; i++) {
4629     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4630     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4631     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4632   }
4633 
4634   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4635   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4636   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4637   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4638   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4639   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4640   PetscFunctionReturn(0);
4641 }
4642 
4643 #undef __FUNCT__
4644 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4645 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4646 {
4647   PetscErrorCode ierr;
4648   PetscInt       m,N,i,rstart,nnz,Ii;
4649   PetscInt       *indx;
4650   PetscScalar    *values;
4651 
4652   PetscFunctionBegin;
4653   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4654   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4655   for (i=0; i<m; i++) {
4656     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4657     Ii   = i + rstart;
4658     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4659     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4660   }
4661   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4662   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4663   PetscFunctionReturn(0);
4664 }
4665 
4666 #undef __FUNCT__
4667 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4668 /*@
4669       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4670                  matrices from each processor
4671 
4672     Collective on MPI_Comm
4673 
4674    Input Parameters:
4675 +    comm - the communicators the parallel matrix will live on
4676 .    inmat - the input sequential matrices
4677 .    n - number of local columns (or PETSC_DECIDE)
4678 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4679 
4680    Output Parameter:
4681 .    outmat - the parallel matrix generated
4682 
4683     Level: advanced
4684 
4685    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4686 
4687 @*/
4688 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4689 {
4690   PetscErrorCode ierr;
4691 
4692   PetscFunctionBegin;
4693   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4694   if (scall == MAT_INITIAL_MATRIX) {
4695     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4696   }
4697   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4698   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4699   PetscFunctionReturn(0);
4700 }
4701 
4702 #undef __FUNCT__
4703 #define __FUNCT__ "MatFileSplit"
4704 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4705 {
4706   PetscErrorCode    ierr;
4707   PetscMPIInt       rank;
4708   PetscInt          m,N,i,rstart,nnz;
4709   size_t            len;
4710   const PetscInt    *indx;
4711   PetscViewer       out;
4712   char              *name;
4713   Mat               B;
4714   const PetscScalar *values;
4715 
4716   PetscFunctionBegin;
4717   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4718   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4719   /* Should this be the type of the diagonal block of A? */
4720   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4721   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4722   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4723   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4724   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4725   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4726   for (i=0; i<m; i++) {
4727     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4728     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4729     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4730   }
4731   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4732   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4733 
4734   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4735   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4736   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4737   sprintf(name,"%s.%d",outfile,rank);
4738   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4739   ierr = PetscFree(name);CHKERRQ(ierr);
4740   ierr = MatView(B,out);CHKERRQ(ierr);
4741   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4742   ierr = MatDestroy(&B);CHKERRQ(ierr);
4743   PetscFunctionReturn(0);
4744 }
4745 
4746 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4747 #undef __FUNCT__
4748 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4749 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4750 {
4751   PetscErrorCode      ierr;
4752   Mat_Merge_SeqsToMPI *merge;
4753   PetscContainer      container;
4754 
4755   PetscFunctionBegin;
4756   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4757   if (container) {
4758     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4759     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4760     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4761     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4762     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4763     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4764     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4765     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4766     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4767     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4768     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4769     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4770     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4771     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4772     ierr = PetscFree(merge);CHKERRQ(ierr);
4773     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4774   }
4775   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4776   PetscFunctionReturn(0);
4777 }
4778 
4779 #include <../src/mat/utils/freespace.h>
4780 #include <petscbt.h>
4781 
4782 #undef __FUNCT__
4783 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4784 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4785 {
4786   PetscErrorCode      ierr;
4787   MPI_Comm            comm;
4788   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4789   PetscMPIInt         size,rank,taga,*len_s;
4790   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4791   PetscInt            proc,m;
4792   PetscInt            **buf_ri,**buf_rj;
4793   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4794   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4795   MPI_Request         *s_waits,*r_waits;
4796   MPI_Status          *status;
4797   MatScalar           *aa=a->a;
4798   MatScalar           **abuf_r,*ba_i;
4799   Mat_Merge_SeqsToMPI *merge;
4800   PetscContainer      container;
4801 
4802   PetscFunctionBegin;
4803   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4804   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4805 
4806   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4807   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4808 
4809   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4810   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4811 
4812   bi     = merge->bi;
4813   bj     = merge->bj;
4814   buf_ri = merge->buf_ri;
4815   buf_rj = merge->buf_rj;
4816 
4817   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4818   owners = merge->rowmap->range;
4819   len_s  = merge->len_s;
4820 
4821   /* send and recv matrix values */
4822   /*-----------------------------*/
4823   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4824   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4825 
4826   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4827   for (proc=0,k=0; proc<size; proc++) {
4828     if (!len_s[proc]) continue;
4829     i    = owners[proc];
4830     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4831     k++;
4832   }
4833 
4834   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4835   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4836   ierr = PetscFree(status);CHKERRQ(ierr);
4837 
4838   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4839   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4840 
4841   /* insert mat values of mpimat */
4842   /*----------------------------*/
4843   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4844   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4845 
4846   for (k=0; k<merge->nrecv; k++) {
4847     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4848     nrows       = *(buf_ri_k[k]);
4849     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4850     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4851   }
4852 
4853   /* set values of ba */
4854   m = merge->rowmap->n;
4855   for (i=0; i<m; i++) {
4856     arow = owners[rank] + i;
4857     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4858     bnzi = bi[i+1] - bi[i];
4859     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4860 
4861     /* add local non-zero vals of this proc's seqmat into ba */
4862     anzi   = ai[arow+1] - ai[arow];
4863     aj     = a->j + ai[arow];
4864     aa     = a->a + ai[arow];
4865     nextaj = 0;
4866     for (j=0; nextaj<anzi; j++) {
4867       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4868         ba_i[j] += aa[nextaj++];
4869       }
4870     }
4871 
4872     /* add received vals into ba */
4873     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4874       /* i-th row */
4875       if (i == *nextrow[k]) {
4876         anzi   = *(nextai[k]+1) - *nextai[k];
4877         aj     = buf_rj[k] + *(nextai[k]);
4878         aa     = abuf_r[k] + *(nextai[k]);
4879         nextaj = 0;
4880         for (j=0; nextaj<anzi; j++) {
4881           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4882             ba_i[j] += aa[nextaj++];
4883           }
4884         }
4885         nextrow[k]++; nextai[k]++;
4886       }
4887     }
4888     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4889   }
4890   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4891   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4892 
4893   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4894   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4895   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4896   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4897   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4898   PetscFunctionReturn(0);
4899 }
4900 
4901 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4902 
4903 #undef __FUNCT__
4904 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4905 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4906 {
4907   PetscErrorCode      ierr;
4908   Mat                 B_mpi;
4909   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4910   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4911   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4912   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4913   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4914   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4915   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4916   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4917   MPI_Status          *status;
4918   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4919   PetscBT             lnkbt;
4920   Mat_Merge_SeqsToMPI *merge;
4921   PetscContainer      container;
4922 
4923   PetscFunctionBegin;
4924   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4925 
4926   /* make sure it is a PETSc comm */
4927   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4928   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4929   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4930 
4931   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4932   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4933 
4934   /* determine row ownership */
4935   /*---------------------------------------------------------*/
4936   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4937   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4938   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4939   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4940   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4941   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4942   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4943 
4944   m      = merge->rowmap->n;
4945   owners = merge->rowmap->range;
4946 
4947   /* determine the number of messages to send, their lengths */
4948   /*---------------------------------------------------------*/
4949   len_s = merge->len_s;
4950 
4951   len          = 0; /* length of buf_si[] */
4952   merge->nsend = 0;
4953   for (proc=0; proc<size; proc++) {
4954     len_si[proc] = 0;
4955     if (proc == rank) {
4956       len_s[proc] = 0;
4957     } else {
4958       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4959       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4960     }
4961     if (len_s[proc]) {
4962       merge->nsend++;
4963       nrows = 0;
4964       for (i=owners[proc]; i<owners[proc+1]; i++) {
4965         if (ai[i+1] > ai[i]) nrows++;
4966       }
4967       len_si[proc] = 2*(nrows+1);
4968       len         += len_si[proc];
4969     }
4970   }
4971 
4972   /* determine the number and length of messages to receive for ij-structure */
4973   /*-------------------------------------------------------------------------*/
4974   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4975   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4976 
4977   /* post the Irecv of j-structure */
4978   /*-------------------------------*/
4979   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4980   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4981 
4982   /* post the Isend of j-structure */
4983   /*--------------------------------*/
4984   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4985 
4986   for (proc=0, k=0; proc<size; proc++) {
4987     if (!len_s[proc]) continue;
4988     i    = owners[proc];
4989     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4990     k++;
4991   }
4992 
4993   /* receives and sends of j-structure are complete */
4994   /*------------------------------------------------*/
4995   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4996   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4997 
4998   /* send and recv i-structure */
4999   /*---------------------------*/
5000   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
5001   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
5002 
5003   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
5004   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
5005   for (proc=0,k=0; proc<size; proc++) {
5006     if (!len_s[proc]) continue;
5007     /* form outgoing message for i-structure:
5008          buf_si[0]:                 nrows to be sent
5009                [1:nrows]:           row index (global)
5010                [nrows+1:2*nrows+1]: i-structure index
5011     */
5012     /*-------------------------------------------*/
5013     nrows       = len_si[proc]/2 - 1;
5014     buf_si_i    = buf_si + nrows+1;
5015     buf_si[0]   = nrows;
5016     buf_si_i[0] = 0;
5017     nrows       = 0;
5018     for (i=owners[proc]; i<owners[proc+1]; i++) {
5019       anzi = ai[i+1] - ai[i];
5020       if (anzi) {
5021         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
5022         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
5023         nrows++;
5024       }
5025     }
5026     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
5027     k++;
5028     buf_si += len_si[proc];
5029   }
5030 
5031   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
5032   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
5033 
5034   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
5035   for (i=0; i<merge->nrecv; i++) {
5036     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
5037   }
5038 
5039   ierr = PetscFree(len_si);CHKERRQ(ierr);
5040   ierr = PetscFree(len_ri);CHKERRQ(ierr);
5041   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
5042   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
5043   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
5044   ierr = PetscFree(buf_s);CHKERRQ(ierr);
5045   ierr = PetscFree(status);CHKERRQ(ierr);
5046 
5047   /* compute a local seq matrix in each processor */
5048   /*----------------------------------------------*/
5049   /* allocate bi array and free space for accumulating nonzero column info */
5050   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
5051   bi[0] = 0;
5052 
5053   /* create and initialize a linked list */
5054   nlnk = N+1;
5055   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5056 
5057   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5058   len  = ai[owners[rank+1]] - ai[owners[rank]];
5059   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
5060 
5061   current_space = free_space;
5062 
5063   /* determine symbolic info for each local row */
5064   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
5065 
5066   for (k=0; k<merge->nrecv; k++) {
5067     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5068     nrows       = *buf_ri_k[k];
5069     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5070     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5071   }
5072 
5073   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5074   len  = 0;
5075   for (i=0; i<m; i++) {
5076     bnzi = 0;
5077     /* add local non-zero cols of this proc's seqmat into lnk */
5078     arow  = owners[rank] + i;
5079     anzi  = ai[arow+1] - ai[arow];
5080     aj    = a->j + ai[arow];
5081     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5082     bnzi += nlnk;
5083     /* add received col data into lnk */
5084     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5085       if (i == *nextrow[k]) { /* i-th row */
5086         anzi  = *(nextai[k]+1) - *nextai[k];
5087         aj    = buf_rj[k] + *nextai[k];
5088         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5089         bnzi += nlnk;
5090         nextrow[k]++; nextai[k]++;
5091       }
5092     }
5093     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5094 
5095     /* if free space is not available, make more free space */
5096     if (current_space->local_remaining<bnzi) {
5097       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
5098       nspacedouble++;
5099     }
5100     /* copy data into free space, then initialize lnk */
5101     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5102     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5103 
5104     current_space->array           += bnzi;
5105     current_space->local_used      += bnzi;
5106     current_space->local_remaining -= bnzi;
5107 
5108     bi[i+1] = bi[i] + bnzi;
5109   }
5110 
5111   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5112 
5113   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
5114   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5115   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5116 
5117   /* create symbolic parallel matrix B_mpi */
5118   /*---------------------------------------*/
5119   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5120   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5121   if (n==PETSC_DECIDE) {
5122     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5123   } else {
5124     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5125   }
5126   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5127   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5128   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5129   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5130   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5131 
5132   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5133   B_mpi->assembled    = PETSC_FALSE;
5134   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5135   merge->bi           = bi;
5136   merge->bj           = bj;
5137   merge->buf_ri       = buf_ri;
5138   merge->buf_rj       = buf_rj;
5139   merge->coi          = NULL;
5140   merge->coj          = NULL;
5141   merge->owners_co    = NULL;
5142 
5143   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5144 
5145   /* attach the supporting struct to B_mpi for reuse */
5146   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5147   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5148   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5149   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5150   *mpimat = B_mpi;
5151 
5152   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5153   PetscFunctionReturn(0);
5154 }
5155 
5156 #undef __FUNCT__
5157 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
5158 /*@C
5159       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
5160                  matrices from each processor
5161 
5162     Collective on MPI_Comm
5163 
5164    Input Parameters:
5165 +    comm - the communicators the parallel matrix will live on
5166 .    seqmat - the input sequential matrices
5167 .    m - number of local rows (or PETSC_DECIDE)
5168 .    n - number of local columns (or PETSC_DECIDE)
5169 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5170 
5171    Output Parameter:
5172 .    mpimat - the parallel matrix generated
5173 
5174     Level: advanced
5175 
5176    Notes:
5177      The dimensions of the sequential matrix in each processor MUST be the same.
5178      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5179      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5180 @*/
5181 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5182 {
5183   PetscErrorCode ierr;
5184   PetscMPIInt    size;
5185 
5186   PetscFunctionBegin;
5187   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5188   if (size == 1) {
5189     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5190     if (scall == MAT_INITIAL_MATRIX) {
5191       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5192     } else {
5193       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5194     }
5195     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5196     PetscFunctionReturn(0);
5197   }
5198   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5199   if (scall == MAT_INITIAL_MATRIX) {
5200     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5201   }
5202   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5203   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5204   PetscFunctionReturn(0);
5205 }
5206 
5207 #undef __FUNCT__
5208 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5209 /*@
5210      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5211           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5212           with MatGetSize()
5213 
5214     Not Collective
5215 
5216    Input Parameters:
5217 +    A - the matrix
5218 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5219 
5220    Output Parameter:
5221 .    A_loc - the local sequential matrix generated
5222 
5223     Level: developer
5224 
5225 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5226 
5227 @*/
5228 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5229 {
5230   PetscErrorCode ierr;
5231   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5232   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5233   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5234   MatScalar      *aa=a->a,*ba=b->a,*cam;
5235   PetscScalar    *ca;
5236   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5237   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5238   PetscBool      match;
5239 
5240   PetscFunctionBegin;
5241   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5242   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5243   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5244   if (scall == MAT_INITIAL_MATRIX) {
5245     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5246     ci[0] = 0;
5247     for (i=0; i<am; i++) {
5248       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5249     }
5250     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5251     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5252     k    = 0;
5253     for (i=0; i<am; i++) {
5254       ncols_o = bi[i+1] - bi[i];
5255       ncols_d = ai[i+1] - ai[i];
5256       /* off-diagonal portion of A */
5257       for (jo=0; jo<ncols_o; jo++) {
5258         col = cmap[*bj];
5259         if (col >= cstart) break;
5260         cj[k]   = col; bj++;
5261         ca[k++] = *ba++;
5262       }
5263       /* diagonal portion of A */
5264       for (j=0; j<ncols_d; j++) {
5265         cj[k]   = cstart + *aj++;
5266         ca[k++] = *aa++;
5267       }
5268       /* off-diagonal portion of A */
5269       for (j=jo; j<ncols_o; j++) {
5270         cj[k]   = cmap[*bj++];
5271         ca[k++] = *ba++;
5272       }
5273     }
5274     /* put together the new matrix */
5275     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5276     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5277     /* Since these are PETSc arrays, change flags to free them as necessary. */
5278     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5279     mat->free_a  = PETSC_TRUE;
5280     mat->free_ij = PETSC_TRUE;
5281     mat->nonew   = 0;
5282   } else if (scall == MAT_REUSE_MATRIX) {
5283     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5284     ci = mat->i; cj = mat->j; cam = mat->a;
5285     for (i=0; i<am; i++) {
5286       /* off-diagonal portion of A */
5287       ncols_o = bi[i+1] - bi[i];
5288       for (jo=0; jo<ncols_o; jo++) {
5289         col = cmap[*bj];
5290         if (col >= cstart) break;
5291         *cam++ = *ba++; bj++;
5292       }
5293       /* diagonal portion of A */
5294       ncols_d = ai[i+1] - ai[i];
5295       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5296       /* off-diagonal portion of A */
5297       for (j=jo; j<ncols_o; j++) {
5298         *cam++ = *ba++; bj++;
5299       }
5300     }
5301   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5302   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5303   PetscFunctionReturn(0);
5304 }
5305 
5306 #undef __FUNCT__
5307 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5308 /*@C
5309      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5310 
5311     Not Collective
5312 
5313    Input Parameters:
5314 +    A - the matrix
5315 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5316 -    row, col - index sets of rows and columns to extract (or NULL)
5317 
5318    Output Parameter:
5319 .    A_loc - the local sequential matrix generated
5320 
5321     Level: developer
5322 
5323 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5324 
5325 @*/
5326 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5327 {
5328   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5329   PetscErrorCode ierr;
5330   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5331   IS             isrowa,iscola;
5332   Mat            *aloc;
5333   PetscBool      match;
5334 
5335   PetscFunctionBegin;
5336   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5337   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5338   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5339   if (!row) {
5340     start = A->rmap->rstart; end = A->rmap->rend;
5341     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5342   } else {
5343     isrowa = *row;
5344   }
5345   if (!col) {
5346     start = A->cmap->rstart;
5347     cmap  = a->garray;
5348     nzA   = a->A->cmap->n;
5349     nzB   = a->B->cmap->n;
5350     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5351     ncols = 0;
5352     for (i=0; i<nzB; i++) {
5353       if (cmap[i] < start) idx[ncols++] = cmap[i];
5354       else break;
5355     }
5356     imark = i;
5357     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5358     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5359     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5360   } else {
5361     iscola = *col;
5362   }
5363   if (scall != MAT_INITIAL_MATRIX) {
5364     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5365     aloc[0] = *A_loc;
5366   }
5367   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5368   *A_loc = aloc[0];
5369   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5370   if (!row) {
5371     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5372   }
5373   if (!col) {
5374     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5375   }
5376   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5377   PetscFunctionReturn(0);
5378 }
5379 
5380 #undef __FUNCT__
5381 #define __FUNCT__ "MatGetBrowsOfAcols"
5382 /*@C
5383     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5384 
5385     Collective on Mat
5386 
5387    Input Parameters:
5388 +    A,B - the matrices in mpiaij format
5389 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5390 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5391 
5392    Output Parameter:
5393 +    rowb, colb - index sets of rows and columns of B to extract
5394 -    B_seq - the sequential matrix generated
5395 
5396     Level: developer
5397 
5398 @*/
5399 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5400 {
5401   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5402   PetscErrorCode ierr;
5403   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5404   IS             isrowb,iscolb;
5405   Mat            *bseq=NULL;
5406 
5407   PetscFunctionBegin;
5408   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5409     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5410   }
5411   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5412 
5413   if (scall == MAT_INITIAL_MATRIX) {
5414     start = A->cmap->rstart;
5415     cmap  = a->garray;
5416     nzA   = a->A->cmap->n;
5417     nzB   = a->B->cmap->n;
5418     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5419     ncols = 0;
5420     for (i=0; i<nzB; i++) {  /* row < local row index */
5421       if (cmap[i] < start) idx[ncols++] = cmap[i];
5422       else break;
5423     }
5424     imark = i;
5425     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5426     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5427     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5428     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5429   } else {
5430     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5431     isrowb  = *rowb; iscolb = *colb;
5432     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5433     bseq[0] = *B_seq;
5434   }
5435   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5436   *B_seq = bseq[0];
5437   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5438   if (!rowb) {
5439     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5440   } else {
5441     *rowb = isrowb;
5442   }
5443   if (!colb) {
5444     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5445   } else {
5446     *colb = iscolb;
5447   }
5448   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5449   PetscFunctionReturn(0);
5450 }
5451 
5452 #undef __FUNCT__
5453 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5454 /*
5455     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5456     of the OFF-DIAGONAL portion of local A
5457 
5458     Collective on Mat
5459 
5460    Input Parameters:
5461 +    A,B - the matrices in mpiaij format
5462 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5463 
5464    Output Parameter:
5465 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5466 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5467 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5468 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5469 
5470     Level: developer
5471 
5472 */
5473 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5474 {
5475   VecScatter_MPI_General *gen_to,*gen_from;
5476   PetscErrorCode         ierr;
5477   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5478   Mat_SeqAIJ             *b_oth;
5479   VecScatter             ctx =a->Mvctx;
5480   MPI_Comm               comm;
5481   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5482   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5483   PetscScalar            *rvalues,*svalues;
5484   MatScalar              *b_otha,*bufa,*bufA;
5485   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5486   MPI_Request            *rwaits = NULL,*swaits = NULL;
5487   MPI_Status             *sstatus,rstatus;
5488   PetscMPIInt            jj;
5489   PetscInt               *cols,sbs,rbs;
5490   PetscScalar            *vals;
5491 
5492   PetscFunctionBegin;
5493   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5494   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5495     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5496   }
5497   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5498   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5499 
5500   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5501   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5502   rvalues  = gen_from->values; /* holds the length of receiving row */
5503   svalues  = gen_to->values;   /* holds the length of sending row */
5504   nrecvs   = gen_from->n;
5505   nsends   = gen_to->n;
5506 
5507   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5508   srow    = gen_to->indices;    /* local row index to be sent */
5509   sstarts = gen_to->starts;
5510   sprocs  = gen_to->procs;
5511   sstatus = gen_to->sstatus;
5512   sbs     = gen_to->bs;
5513   rstarts = gen_from->starts;
5514   rprocs  = gen_from->procs;
5515   rbs     = gen_from->bs;
5516 
5517   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5518   if (scall == MAT_INITIAL_MATRIX) {
5519     /* i-array */
5520     /*---------*/
5521     /*  post receives */
5522     for (i=0; i<nrecvs; i++) {
5523       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5524       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5525       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5526     }
5527 
5528     /* pack the outgoing message */
5529     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5530 
5531     sstartsj[0] = 0;
5532     rstartsj[0] = 0;
5533     len         = 0; /* total length of j or a array to be sent */
5534     k           = 0;
5535     for (i=0; i<nsends; i++) {
5536       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5537       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5538       for (j=0; j<nrows; j++) {
5539         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5540         for (l=0; l<sbs; l++) {
5541           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5542 
5543           rowlen[j*sbs+l] = ncols;
5544 
5545           len += ncols;
5546           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5547         }
5548         k++;
5549       }
5550       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5551 
5552       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5553     }
5554     /* recvs and sends of i-array are completed */
5555     i = nrecvs;
5556     while (i--) {
5557       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5558     }
5559     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5560 
5561     /* allocate buffers for sending j and a arrays */
5562     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5563     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5564 
5565     /* create i-array of B_oth */
5566     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5567 
5568     b_othi[0] = 0;
5569     len       = 0; /* total length of j or a array to be received */
5570     k         = 0;
5571     for (i=0; i<nrecvs; i++) {
5572       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5573       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5574       for (j=0; j<nrows; j++) {
5575         b_othi[k+1] = b_othi[k] + rowlen[j];
5576         len        += rowlen[j]; k++;
5577       }
5578       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5579     }
5580 
5581     /* allocate space for j and a arrrays of B_oth */
5582     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5583     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5584 
5585     /* j-array */
5586     /*---------*/
5587     /*  post receives of j-array */
5588     for (i=0; i<nrecvs; i++) {
5589       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5590       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5591     }
5592 
5593     /* pack the outgoing message j-array */
5594     k = 0;
5595     for (i=0; i<nsends; i++) {
5596       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5597       bufJ  = bufj+sstartsj[i];
5598       for (j=0; j<nrows; j++) {
5599         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5600         for (ll=0; ll<sbs; ll++) {
5601           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5602           for (l=0; l<ncols; l++) {
5603             *bufJ++ = cols[l];
5604           }
5605           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5606         }
5607       }
5608       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5609     }
5610 
5611     /* recvs and sends of j-array are completed */
5612     i = nrecvs;
5613     while (i--) {
5614       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5615     }
5616     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5617   } else if (scall == MAT_REUSE_MATRIX) {
5618     sstartsj = *startsj_s;
5619     rstartsj = *startsj_r;
5620     bufa     = *bufa_ptr;
5621     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5622     b_otha   = b_oth->a;
5623   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5624 
5625   /* a-array */
5626   /*---------*/
5627   /*  post receives of a-array */
5628   for (i=0; i<nrecvs; i++) {
5629     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5630     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5631   }
5632 
5633   /* pack the outgoing message a-array */
5634   k = 0;
5635   for (i=0; i<nsends; i++) {
5636     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5637     bufA  = bufa+sstartsj[i];
5638     for (j=0; j<nrows; j++) {
5639       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5640       for (ll=0; ll<sbs; ll++) {
5641         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5642         for (l=0; l<ncols; l++) {
5643           *bufA++ = vals[l];
5644         }
5645         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5646       }
5647     }
5648     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5649   }
5650   /* recvs and sends of a-array are completed */
5651   i = nrecvs;
5652   while (i--) {
5653     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5654   }
5655   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5656   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5657 
5658   if (scall == MAT_INITIAL_MATRIX) {
5659     /* put together the new matrix */
5660     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5661 
5662     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5663     /* Since these are PETSc arrays, change flags to free them as necessary. */
5664     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5665     b_oth->free_a  = PETSC_TRUE;
5666     b_oth->free_ij = PETSC_TRUE;
5667     b_oth->nonew   = 0;
5668 
5669     ierr = PetscFree(bufj);CHKERRQ(ierr);
5670     if (!startsj_s || !bufa_ptr) {
5671       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5672       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5673     } else {
5674       *startsj_s = sstartsj;
5675       *startsj_r = rstartsj;
5676       *bufa_ptr  = bufa;
5677     }
5678   }
5679   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5680   PetscFunctionReturn(0);
5681 }
5682 
5683 #undef __FUNCT__
5684 #define __FUNCT__ "MatGetCommunicationStructs"
5685 /*@C
5686   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5687 
5688   Not Collective
5689 
5690   Input Parameters:
5691 . A - The matrix in mpiaij format
5692 
5693   Output Parameter:
5694 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5695 . colmap - A map from global column index to local index into lvec
5696 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5697 
5698   Level: developer
5699 
5700 @*/
5701 #if defined(PETSC_USE_CTABLE)
5702 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5703 #else
5704 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5705 #endif
5706 {
5707   Mat_MPIAIJ *a;
5708 
5709   PetscFunctionBegin;
5710   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5711   PetscValidPointer(lvec, 2);
5712   PetscValidPointer(colmap, 3);
5713   PetscValidPointer(multScatter, 4);
5714   a = (Mat_MPIAIJ*) A->data;
5715   if (lvec) *lvec = a->lvec;
5716   if (colmap) *colmap = a->colmap;
5717   if (multScatter) *multScatter = a->Mvctx;
5718   PetscFunctionReturn(0);
5719 }
5720 
5721 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5722 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5723 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5724 
5725 #undef __FUNCT__
5726 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5727 /*
5728     Computes (B'*A')' since computing B*A directly is untenable
5729 
5730                n                       p                          p
5731         (              )       (              )         (                  )
5732       m (      A       )  *  n (       B      )   =   m (         C        )
5733         (              )       (              )         (                  )
5734 
5735 */
5736 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5737 {
5738   PetscErrorCode ierr;
5739   Mat            At,Bt,Ct;
5740 
5741   PetscFunctionBegin;
5742   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5743   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5744   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5745   ierr = MatDestroy(&At);CHKERRQ(ierr);
5746   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5747   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5748   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5749   PetscFunctionReturn(0);
5750 }
5751 
5752 #undef __FUNCT__
5753 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5754 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5755 {
5756   PetscErrorCode ierr;
5757   PetscInt       m=A->rmap->n,n=B->cmap->n;
5758   Mat            Cmat;
5759 
5760   PetscFunctionBegin;
5761   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5762   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5763   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5764   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5765   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5766   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5767   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5768   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5769 
5770   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5771 
5772   *C = Cmat;
5773   PetscFunctionReturn(0);
5774 }
5775 
5776 /* ----------------------------------------------------------------*/
5777 #undef __FUNCT__
5778 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5779 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5780 {
5781   PetscErrorCode ierr;
5782 
5783   PetscFunctionBegin;
5784   if (scall == MAT_INITIAL_MATRIX) {
5785     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5786     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5787     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5788   }
5789   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5790   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5791   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5792   PetscFunctionReturn(0);
5793 }
5794 
5795 #if defined(PETSC_HAVE_MUMPS)
5796 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5797 #endif
5798 #if defined(PETSC_HAVE_PASTIX)
5799 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5800 #endif
5801 #if defined(PETSC_HAVE_SUPERLU_DIST)
5802 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5803 #endif
5804 #if defined(PETSC_HAVE_CLIQUE)
5805 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5806 #endif
5807 
5808 /*MC
5809    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5810 
5811    Options Database Keys:
5812 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5813 
5814   Level: beginner
5815 
5816 .seealso: MatCreateAIJ()
5817 M*/
5818 
5819 #undef __FUNCT__
5820 #define __FUNCT__ "MatCreate_MPIAIJ"
5821 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5822 {
5823   Mat_MPIAIJ     *b;
5824   PetscErrorCode ierr;
5825   PetscMPIInt    size;
5826 
5827   PetscFunctionBegin;
5828   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5829 
5830   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5831   B->data       = (void*)b;
5832   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5833   B->assembled  = PETSC_FALSE;
5834   B->insertmode = NOT_SET_VALUES;
5835   b->size       = size;
5836 
5837   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5838 
5839   /* build cache for off array entries formed */
5840   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5841 
5842   b->donotstash  = PETSC_FALSE;
5843   b->colmap      = 0;
5844   b->garray      = 0;
5845   b->roworiented = PETSC_TRUE;
5846 
5847   /* stuff used for matrix vector multiply */
5848   b->lvec  = NULL;
5849   b->Mvctx = NULL;
5850 
5851   /* stuff for MatGetRow() */
5852   b->rowindices   = 0;
5853   b->rowvalues    = 0;
5854   b->getrowactive = PETSC_FALSE;
5855 
5856   /* flexible pointer used in CUSP/CUSPARSE classes */
5857   b->spptr = NULL;
5858 
5859 #if defined(PETSC_HAVE_MUMPS)
5860   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5861 #endif
5862 #if defined(PETSC_HAVE_PASTIX)
5863   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5864 #endif
5865 #if defined(PETSC_HAVE_SUPERLU_DIST)
5866   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5867 #endif
5868 #if defined(PETSC_HAVE_CLIQUE)
5869   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5870 #endif
5871   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5872   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5873   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5874   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5875   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5876   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5877   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5878   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5879   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5880   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5881   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5882   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5883   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5884   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5885   PetscFunctionReturn(0);
5886 }
5887 
5888 #undef __FUNCT__
5889 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5890 /*@
5891      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5892          and "off-diagonal" part of the matrix in CSR format.
5893 
5894    Collective on MPI_Comm
5895 
5896    Input Parameters:
5897 +  comm - MPI communicator
5898 .  m - number of local rows (Cannot be PETSC_DECIDE)
5899 .  n - This value should be the same as the local size used in creating the
5900        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5901        calculated if N is given) For square matrices n is almost always m.
5902 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5903 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5904 .   i - row indices for "diagonal" portion of matrix
5905 .   j - column indices
5906 .   a - matrix values
5907 .   oi - row indices for "off-diagonal" portion of matrix
5908 .   oj - column indices
5909 -   oa - matrix values
5910 
5911    Output Parameter:
5912 .   mat - the matrix
5913 
5914    Level: advanced
5915 
5916    Notes:
5917        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5918        must free the arrays once the matrix has been destroyed and not before.
5919 
5920        The i and j indices are 0 based
5921 
5922        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5923 
5924        This sets local rows and cannot be used to set off-processor values.
5925 
5926        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5927        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5928        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5929        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5930        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5931        communication if it is known that only local entries will be set.
5932 
5933 .keywords: matrix, aij, compressed row, sparse, parallel
5934 
5935 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5936           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5937 @*/
5938 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5939 {
5940   PetscErrorCode ierr;
5941   Mat_MPIAIJ     *maij;
5942 
5943   PetscFunctionBegin;
5944   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5945   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5946   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5947   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5948   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5949   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5950   maij = (Mat_MPIAIJ*) (*mat)->data;
5951 
5952   (*mat)->preallocated = PETSC_TRUE;
5953 
5954   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5955   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5956 
5957   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5958   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5959 
5960   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5961   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5962   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5963   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5964 
5965   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5966   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5967   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5968   PetscFunctionReturn(0);
5969 }
5970 
5971 /*
5972     Special version for direct calls from Fortran
5973 */
5974 #include <petsc-private/fortranimpl.h>
5975 
5976 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5977 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5978 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5979 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5980 #endif
5981 
5982 /* Change these macros so can be used in void function */
5983 #undef CHKERRQ
5984 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5985 #undef SETERRQ2
5986 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5987 #undef SETERRQ3
5988 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5989 #undef SETERRQ
5990 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5991 
5992 #undef __FUNCT__
5993 #define __FUNCT__ "matsetvaluesmpiaij_"
5994 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5995 {
5996   Mat            mat  = *mmat;
5997   PetscInt       m    = *mm, n = *mn;
5998   InsertMode     addv = *maddv;
5999   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6000   PetscScalar    value;
6001   PetscErrorCode ierr;
6002 
6003   MatCheckPreallocated(mat,1);
6004   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6005 
6006 #if defined(PETSC_USE_DEBUG)
6007   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6008 #endif
6009   {
6010     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6011     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6012     PetscBool roworiented = aij->roworiented;
6013 
6014     /* Some Variables required in the macro */
6015     Mat        A                 = aij->A;
6016     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6017     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6018     MatScalar  *aa               = a->a;
6019     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6020     Mat        B                 = aij->B;
6021     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6022     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6023     MatScalar  *ba               = b->a;
6024 
6025     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6026     PetscInt  nonew = a->nonew;
6027     MatScalar *ap1,*ap2;
6028 
6029     PetscFunctionBegin;
6030     for (i=0; i<m; i++) {
6031       if (im[i] < 0) continue;
6032 #if defined(PETSC_USE_DEBUG)
6033       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6034 #endif
6035       if (im[i] >= rstart && im[i] < rend) {
6036         row      = im[i] - rstart;
6037         lastcol1 = -1;
6038         rp1      = aj + ai[row];
6039         ap1      = aa + ai[row];
6040         rmax1    = aimax[row];
6041         nrow1    = ailen[row];
6042         low1     = 0;
6043         high1    = nrow1;
6044         lastcol2 = -1;
6045         rp2      = bj + bi[row];
6046         ap2      = ba + bi[row];
6047         rmax2    = bimax[row];
6048         nrow2    = bilen[row];
6049         low2     = 0;
6050         high2    = nrow2;
6051 
6052         for (j=0; j<n; j++) {
6053           if (roworiented) value = v[i*n+j];
6054           else value = v[i+j*m];
6055           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
6056           if (in[j] >= cstart && in[j] < cend) {
6057             col = in[j] - cstart;
6058             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
6059           } else if (in[j] < 0) continue;
6060 #if defined(PETSC_USE_DEBUG)
6061           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6062 #endif
6063           else {
6064             if (mat->was_assembled) {
6065               if (!aij->colmap) {
6066                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6067               }
6068 #if defined(PETSC_USE_CTABLE)
6069               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6070               col--;
6071 #else
6072               col = aij->colmap[in[j]] - 1;
6073 #endif
6074               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6075                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6076                 col  =  in[j];
6077                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6078                 B     = aij->B;
6079                 b     = (Mat_SeqAIJ*)B->data;
6080                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6081                 rp2   = bj + bi[row];
6082                 ap2   = ba + bi[row];
6083                 rmax2 = bimax[row];
6084                 nrow2 = bilen[row];
6085                 low2  = 0;
6086                 high2 = nrow2;
6087                 bm    = aij->B->rmap->n;
6088                 ba    = b->a;
6089               }
6090             } else col = in[j];
6091             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
6092           }
6093         }
6094       } else if (!aij->donotstash) {
6095         if (roworiented) {
6096           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6097         } else {
6098           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6099         }
6100       }
6101     }
6102   }
6103   PetscFunctionReturnVoid();
6104 }
6105 
6106