xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2e7729f58b5451bb0bf4b49a70efaee73a32b7c6)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
187     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
188     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
189     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
190     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
191     rowners[0] = 0;
192     for (i=2; i<=size; i++) {
193       rowners[i] += rowners[i-1];
194     }
195     rstart = rowners[rank];
196     rend   = rowners[rank+1];
197     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
198     if (!rank) {
199       gmata = (Mat_SeqAIJ*) gmat->data;
200       /* send row lengths to all processors */
201       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202       for (i=1; i<size; i++) {
203 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
204       }
205       /* determine number diagonal and off-diagonal counts */
206       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
207       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
208       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
209       jj = 0;
210       for (i=0; i<m; i++) {
211 	for (j=0; j<dlens[i]; j++) {
212           if (gmata->j[jj] < rstart) ld[i]++;
213 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214 	  jj++;
215 	}
216       }
217       /* send column indices to other processes */
218       for (i=1; i<size; i++) {
219 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
222       }
223 
224       /* send numerical values to other processes */
225       for (i=1; i<size; i++) {
226         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
228       }
229       gmataa = gmata->a;
230       gmataj = gmata->j;
231 
232     } else {
233       /* receive row lengths */
234       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
235       /* receive column indices */
236       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
237       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
238       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
239       /* determine number diagonal and off-diagonal counts */
240       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
241       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
242       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
243       jj = 0;
244       for (i=0; i<m; i++) {
245 	for (j=0; j<dlens[i]; j++) {
246           if (gmataj[jj] < rstart) ld[i]++;
247 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248 	  jj++;
249 	}
250       }
251       /* receive numerical values */
252       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
253       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
254     }
255     /* set preallocation */
256     for (i=0; i<m; i++) {
257       dlens[i] -= olens[i];
258     }
259     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
260     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
261 
262     for (i=0; i<m; i++) {
263       dlens[i] += olens[i];
264     }
265     cnt  = 0;
266     for (i=0; i<m; i++) {
267       row  = rstart + i;
268       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
269       cnt += dlens[i];
270     }
271     if (rank) {
272       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
273     }
274     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
275     ierr = PetscFree(rowners);CHKERRQ(ierr);
276     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277     *inmat = mat;
278   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
279     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281     mat   = *inmat;
282     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
283     if (!rank) {
284       /* send numerical values to other processes */
285       gmata = (Mat_SeqAIJ*) gmat->data;
286       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
287       gmataa = gmata->a;
288       for (i=1; i<size; i++) {
289         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
291       }
292       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293     } else {
294       /* receive numerical values from process 0*/
295       nz   = Ad->nz + Ao->nz;
296       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
297       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
298     }
299     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301     ad = Ad->a;
302     ao = Ao->a;
303     if (mat->rmap->n) {
304       i  = 0;
305       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
306       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
307     }
308     for (i=1; i<mat->rmap->n; i++) {
309       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
310       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
311     }
312     i--;
313     if (mat->rmap->n) {
314       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
315     }
316     if (rank) {
317       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
318     }
319   }
320   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   CHKMEMQ;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   Local utility routine that creates a mapping from the global column
328 number to the local number in the off-diagonal part of the local
329 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
330 a slightly higher hash table cost; without it it is not scalable (each processor
331 has an order N integer array but is fast to acess.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336 {
337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
338   PetscErrorCode ierr;
339   PetscInt       n = aij->B->cmap->n,i;
340 
341   PetscFunctionBegin;
342   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343 #if defined (PETSC_USE_CTABLE)
344   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
345   for (i=0; i<n; i++){
346     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
347   }
348 #else
349   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
350   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
352   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353 #endif
354   PetscFunctionReturn(0);
355 }
356 
357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358 { \
359     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360     lastcol1 = col;\
361     while (high1-low1 > 5) { \
362       t = (low1+high1)/2; \
363       if (rp1[t] > col) high1 = t; \
364       else             low1  = t; \
365     } \
366       for (_i=low1; _i<high1; _i++) { \
367         if (rp1[_i] > col) break; \
368         if (rp1[_i] == col) { \
369           if (addv == ADD_VALUES) ap1[_i] += value;   \
370           else                    ap1[_i] = value; \
371           goto a_noinsert; \
372         } \
373       }  \
374       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378       N = nrow1++ - 1; a->nz++; high1++; \
379       /* shift up all the later entries in this row */ \
380       for (ii=N; ii>=_i; ii--) { \
381         rp1[ii+1] = rp1[ii]; \
382         ap1[ii+1] = ap1[ii]; \
383       } \
384       rp1[_i] = col;  \
385       ap1[_i] = value;  \
386       a_noinsert: ; \
387       ailen[row] = nrow1; \
388 }
389 
390 
391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392 { \
393     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394     lastcol2 = col;\
395     while (high2-low2 > 5) { \
396       t = (low2+high2)/2; \
397       if (rp2[t] > col) high2 = t; \
398       else             low2  = t; \
399     } \
400     for (_i=low2; _i<high2; _i++) {		\
401       if (rp2[_i] > col) break;			\
402       if (rp2[_i] == col) {			      \
403 	if (addv == ADD_VALUES) ap2[_i] += value;     \
404 	else                    ap2[_i] = value;      \
405 	goto b_noinsert;			      \
406       }						      \
407     }							      \
408     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
410     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412     N = nrow2++ - 1; b->nz++; high2++;					\
413     /* shift up all the later entries in this row */			\
414     for (ii=N; ii>=_i; ii--) {						\
415       rp2[ii+1] = rp2[ii];						\
416       ap2[ii+1] = ap2[ii];						\
417     }									\
418     rp2[_i] = col;							\
419     ap2[_i] = value;							\
420     b_noinsert: ;								\
421     bilen[row] = nrow2;							\
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427 {
428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
430   PetscErrorCode ierr;
431   PetscInt       l,*garray = mat->garray,diag;
432 
433   PetscFunctionBegin;
434   /* code only works for square matrices A */
435 
436   /* find size of row to the left of the diagonal part */
437   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
438   row  = row - diag;
439   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440     if (garray[b->j[b->i[row]+l]] > diag) break;
441   }
442   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
443 
444   /* diagonal part */
445   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
446 
447   /* right of diagonal part */
448   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValues_MPIAIJ"
454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscScalar    value;
458   PetscErrorCode ierr;
459   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461   PetscBool      roworiented = aij->roworiented;
462 
463   /* Some Variables required in the macro */
464   Mat            A = aij->A;
465   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
466   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467   MatScalar      *aa = a->a;
468   PetscBool      ignorezeroentries = a->ignorezeroentries;
469   Mat            B = aij->B;
470   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
471   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472   MatScalar      *ba = b->a;
473 
474   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475   PetscInt       nonew;
476   MatScalar      *ap1,*ap2;
477 
478   PetscFunctionBegin;
479   if (v) PetscValidScalarPointer(v,6);
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_DEBUG)
483     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row      = im[i] - rstart;
487       lastcol1 = -1;
488       rp1      = aj + ai[row];
489       ap1      = aa + ai[row];
490       rmax1    = aimax[row];
491       nrow1    = ailen[row];
492       low1     = 0;
493       high1    = nrow1;
494       lastcol2 = -1;
495       rp2      = bj + bi[row];
496       ap2      = ba + bi[row];
497       rmax2    = bimax[row];
498       nrow2    = bilen[row];
499       low2     = 0;
500       high2    = nrow2;
501 
502       for (j=0; j<n; j++) {
503         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505         if (in[j] >= cstart && in[j] < cend){
506           col = in[j] - cstart;
507           nonew = a->nonew;
508           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509         } else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!aij->colmap) {
516               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 #if defined (PETSC_USE_CTABLE)
519             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520 	    col--;
521 #else
522             col = aij->colmap[in[j]] - 1;
523 #endif
524             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526               col =  in[j];
527               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528               B = aij->B;
529               b = (Mat_SeqAIJ*)B->data;
530               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531               rp2      = bj + bi[row];
532               ap2      = ba + bi[row];
533               rmax2    = bimax[row];
534               nrow2    = bilen[row];
535               low2     = 0;
536               high2    = nrow2;
537               bm       = aij->B->rmap->n;
538               ba = b->a;
539             }
540           } else col = in[j];
541           nonew = b->nonew;
542           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543         }
544       }
545     } else {
546       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547       if (!aij->donotstash) {
548         mat->assembled = PETSC_FALSE;
549         if (roworiented) {
550           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatGetValues_MPIAIJ"
562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563 {
564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
565   PetscErrorCode ierr;
566   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
568 
569   PetscFunctionBegin;
570   for (i=0; i<m; i++) {
571     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573     if (idxm[i] >= rstart && idxm[i] < rend) {
574       row = idxm[i] - rstart;
575       for (j=0; j<n; j++) {
576         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578         if (idxn[j] >= cstart && idxn[j] < cend){
579           col = idxn[j] - cstart;
580           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581         } else {
582           if (!aij->colmap) {
583             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
584           }
585 #if defined (PETSC_USE_CTABLE)
586           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
587           col --;
588 #else
589           col = aij->colmap[idxn[j]] - 1;
590 #endif
591           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592           else {
593             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
594           }
595         }
596       }
597     } else {
598       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609 {
610   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
611   PetscErrorCode ierr;
612   PetscInt       nstash,reallocs;
613   InsertMode     addv;
614 
615   PetscFunctionBegin;
616   if (aij->donotstash || mat->nooffprocentries) {
617     PetscFunctionReturn(0);
618   }
619 
620   /* make sure all processors are either in INSERTMODE or ADDMODE */
621   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
622   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
637   PetscErrorCode ierr;
638   PetscMPIInt    n;
639   PetscInt       i,j,rstart,ncols,flg;
640   PetscInt       *row,*col;
641   PetscBool      other_disassembled;
642   PetscScalar    *val;
643   InsertMode     addv = mat->insertmode;
644 
645   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
646   PetscFunctionBegin;
647   if (!aij->donotstash && !mat->nooffprocentries) {
648     while (1) {
649       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
650       if (!flg) break;
651 
652       for (i=0; i<n;) {
653         /* Now identify the consecutive vals belonging to the same row */
654         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655         if (j < n) ncols = j-i;
656         else       ncols = n-i;
657         /* Now assemble all these values with a single function call */
658         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
659         i = j;
660       }
661     }
662     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
663   }
664   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
665   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
666 
667   /* determine if any processor has disassembled, if so we must
668      also disassemble ourselfs, in order that we may reassemble. */
669   /*
670      if nonzero structure of submatrix B cannot change then we know that
671      no processor disassembled thus we can skip this stuff
672   */
673   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
674     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
675     if (mat->was_assembled && !other_disassembled) {
676       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
677     }
678   }
679   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
681   }
682   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
684   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
685   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
686 
687   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
688   aij->rowvalues = 0;
689 
690   /* used by MatAXPY() */
691   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
692   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
693 
694   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
695   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702 {
703   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
708   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatZeroRows_MPIAIJ"
714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715 {
716   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode    ierr;
718   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719   PetscInt          i,*owners = A->rmap->range;
720   PetscInt          *nprocs,j,idx,nsends,row;
721   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
722   PetscInt          *rvalues,count,base,slen,*source;
723   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
724   MPI_Comm          comm = ((PetscObject)A)->comm;
725   MPI_Request       *send_waits,*recv_waits;
726   MPI_Status        recv_status,*send_status;
727   const PetscScalar *xx;
728   PetscScalar       *bb;
729 #if defined(PETSC_DEBUG)
730   PetscBool      found = PETSC_FALSE;
731 #endif
732 
733   PetscFunctionBegin;
734   /*  first count number of contributors to each processor */
735   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
736   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
737   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
738   j = 0;
739   for (i=0; i<N; i++) {
740     if (lastidx > (idx = rows[i])) j = 0;
741     lastidx = idx;
742     for (; j<size; j++) {
743       if (idx >= owners[j] && idx < owners[j+1]) {
744         nprocs[2*j]++;
745         nprocs[2*j+1] = 1;
746         owner[i] = j;
747 #if defined(PETSC_DEBUG)
748         found = PETSC_TRUE;
749 #endif
750         break;
751       }
752     }
753 #if defined(PETSC_DEBUG)
754     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755     found = PETSC_FALSE;
756 #endif
757   }
758   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
759 
760   if (A->nooffproczerorows) {
761     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762     nrecvs = nsends;
763     nmax   = N;
764   } else {
765     /* inform other processors of number of messages and max length*/
766     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
767   }
768 
769   /* post receives:   */
770   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
771   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
772   for (i=0; i<nrecvs; i++) {
773     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
774   }
775 
776   /* do sends:
777       1) starts[i] gives the starting index in svalues for stuff going to
778          the ith processor
779   */
780   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
781   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
782   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
783   starts[0] = 0;
784   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785   for (i=0; i<N; i++) {
786     svalues[starts[owner[i]]++] = rows[i];
787   }
788 
789   starts[0] = 0;
790   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791   count = 0;
792   for (i=0; i<size; i++) {
793     if (nprocs[2*i+1]) {
794       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
795     }
796   }
797   ierr = PetscFree(starts);CHKERRQ(ierr);
798 
799   base = owners[rank];
800 
801   /*  wait on receives */
802   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
803   count  = nrecvs; slen = 0;
804   while (count) {
805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
806     /* unpack receives into our local space */
807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
808     source[imdex]  = recv_status.MPI_SOURCE;
809     lens[imdex]    = n;
810     slen          += n;
811     count--;
812   }
813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
814 
815   /* move the data into the send scatter */
816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
817   count = 0;
818   for (i=0; i<nrecvs; i++) {
819     values = rvalues + i*nmax;
820     for (j=0; j<lens[i]; j++) {
821       lrows[count++] = values[j] - base;
822     }
823   }
824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
826   ierr = PetscFree(owner);CHKERRQ(ierr);
827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
828 
829   /* fix right hand side if needed */
830   if (x && b) {
831     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
833     for (i=0; i<slen; i++) {
834       bb[lrows[i]] = diag*xx[lrows[i]];
835     }
836     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
837     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
838   }
839   /*
840         Zero the required rows. If the "diagonal block" of the matrix
841      is square and the user wishes to set the diagonal we use separate
842      code so that MatSetValues() is not called for each diagonal allocating
843      new memory, thus calling lots of mallocs and slowing things down.
844 
845   */
846   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
850   } else if (diag != 0.0) {
851     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
852     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855     }
856     for (i = 0; i < slen; i++) {
857       row  = lrows[i] + rstart;
858       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
859     }
860     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   } else {
863     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(lrows);CHKERRQ(ierr);
866 
867   /* wait on sends */
868   if (nsends) {
869     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
870     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
871     ierr = PetscFree(send_status);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(send_waits);CHKERRQ(ierr);
874   ierr = PetscFree(svalues);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNCT__
879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881 {
882   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
883   PetscErrorCode    ierr;
884   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885   PetscInt          i,*owners = A->rmap->range;
886   PetscInt          *nprocs,j,idx,nsends;
887   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
888   PetscInt          *rvalues,count,base,slen,*source;
889   PetscInt          *lens,*lrows,*values,m;
890   MPI_Comm          comm = ((PetscObject)A)->comm;
891   MPI_Request       *send_waits,*recv_waits;
892   MPI_Status        recv_status,*send_status;
893   const PetscScalar *xx;
894   PetscScalar       *bb,*mask;
895   Vec               xmask,lmask;
896   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
897   const PetscInt    *aj, *ii,*ridx;
898   PetscScalar       *aa;
899 #if defined(PETSC_DEBUG)
900   PetscBool         found = PETSC_FALSE;
901 #endif
902 
903   PetscFunctionBegin;
904   /*  first count number of contributors to each processor */
905   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
906   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
907   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
908   j = 0;
909   for (i=0; i<N; i++) {
910     if (lastidx > (idx = rows[i])) j = 0;
911     lastidx = idx;
912     for (; j<size; j++) {
913       if (idx >= owners[j] && idx < owners[j+1]) {
914         nprocs[2*j]++;
915         nprocs[2*j+1] = 1;
916         owner[i] = j;
917 #if defined(PETSC_DEBUG)
918         found = PETSC_TRUE;
919 #endif
920         break;
921       }
922     }
923 #if defined(PETSC_DEBUG)
924     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925     found = PETSC_FALSE;
926 #endif
927   }
928   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
929 
930   /* inform other processors of number of messages and max length*/
931   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
932 
933   /* post receives:   */
934   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
935   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
936   for (i=0; i<nrecvs; i++) {
937     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
938   }
939 
940   /* do sends:
941       1) starts[i] gives the starting index in svalues for stuff going to
942          the ith processor
943   */
944   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
945   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
946   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
947   starts[0] = 0;
948   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949   for (i=0; i<N; i++) {
950     svalues[starts[owner[i]]++] = rows[i];
951   }
952 
953   starts[0] = 0;
954   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955   count = 0;
956   for (i=0; i<size; i++) {
957     if (nprocs[2*i+1]) {
958       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
959     }
960   }
961   ierr = PetscFree(starts);CHKERRQ(ierr);
962 
963   base = owners[rank];
964 
965   /*  wait on receives */
966   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
967   count  = nrecvs; slen = 0;
968   while (count) {
969     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
970     /* unpack receives into our local space */
971     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
972     source[imdex]  = recv_status.MPI_SOURCE;
973     lens[imdex]    = n;
974     slen          += n;
975     count--;
976   }
977   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
978 
979   /* move the data into the send scatter */
980   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
981   count = 0;
982   for (i=0; i<nrecvs; i++) {
983     values = rvalues + i*nmax;
984     for (j=0; j<lens[i]; j++) {
985       lrows[count++] = values[j] - base;
986     }
987   }
988   ierr = PetscFree(rvalues);CHKERRQ(ierr);
989   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
990   ierr = PetscFree(owner);CHKERRQ(ierr);
991   ierr = PetscFree(nprocs);CHKERRQ(ierr);
992   /* lrows are the local rows to be zeroed, slen is the number of local rows */
993 
994   /* zero diagonal part of matrix */
995   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
996 
997   /* handle off diagonal part of matrix */
998   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
999   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1000   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1001   for (i=0; i<slen; i++) {
1002     bb[lrows[i]] = 1;
1003   }
1004   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1005   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1008   if (x) {
1009     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1013   }
1014   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1015 
1016   /* remove zeroed rows of off diagonal matrix */
1017   ii = aij->i;
1018   for (i=0; i<slen; i++) {
1019     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1020   }
1021 
1022   /* loop over all elements of off process part of matrix zeroing removed columns*/
1023   if (aij->compressedrow.use){
1024     m    = aij->compressedrow.nrows;
1025     ii   = aij->compressedrow.i;
1026     ridx = aij->compressedrow.rindex;
1027     for (i=0; i<m; i++){
1028       n   = ii[i+1] - ii[i];
1029       aj  = aij->j + ii[i];
1030       aa  = aij->a + ii[i];
1031 
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[*ridx] -= *aa*xx[*aj];
1035           *aa        = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040       ridx++;
1041     }
1042   } else { /* do not use compressed row format */
1043     m = l->B->rmap->n;
1044     for (i=0; i<m; i++) {
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048       for (j=0; j<n; j++) {
1049         if (PetscAbsScalar(mask[*aj])) {
1050           if (b) bb[i] -= *aa*xx[*aj];
1051           *aa    = 0.0;
1052         }
1053         aa++;
1054         aj++;
1055       }
1056     }
1057   }
1058   if (x) {
1059     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1060     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1061   }
1062   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1063   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1064   ierr = PetscFree(lrows);CHKERRQ(ierr);
1065 
1066   /* wait on sends */
1067   if (nsends) {
1068     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1069     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1070     ierr = PetscFree(send_status);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1073   ierr = PetscFree(svalues);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMult_MPIAIJ"
1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       nt;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1088   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129   PetscBool      merged;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   if (!merged) {
1136     /* send it on its way */
1137     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     /* do local part */
1139     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140     /* receive remote parts: note this assumes the values are not actually */
1141     /* added in yy until the next line, */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   } else {
1144     /* do local part */
1145     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1146     /* send it on its way */
1147     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148     /* values actually were received in the Begin() but we need to call this nop */
1149     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1157 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Easy test: symmetric diagonal block */
1170   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195    PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1213   /* receive remote parts */
1214   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225 {
1226   PetscErrorCode ierr;
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228 
1229   PetscFunctionBegin;
1230   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatScale_MPIAIJ"
1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239 {
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1245   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatDestroy_MPIAIJ"
1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined (PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1273   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1274   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291 {
1292   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1293   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1294   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1297   int               fd;
1298   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1299   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300   PetscScalar       *column_values;
1301   PetscInt          message_count,flowcontrolcount;
1302   FILE              *file;
1303 
1304   PetscFunctionBegin;
1305   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1306   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1307   nz   = A->nz + B->nz;
1308   if (!rank) {
1309     header[0] = MAT_FILE_CLASSID;
1310     header[1] = mat->rmap->N;
1311     header[2] = mat->cmap->N;
1312     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1313     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1314     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     /* get largest number of rows any processor has */
1316     rlen = mat->rmap->n;
1317     range = mat->rmap->range;
1318     for (i=1; i<size; i++) {
1319       rlen = PetscMax(rlen,range[i+1] - range[i]);
1320     }
1321   } else {
1322     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1323     rlen = mat->rmap->n;
1324   }
1325 
1326   /* load up the local row counts */
1327   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1328   for (i=0; i<mat->rmap->n; i++) {
1329     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1330   }
1331 
1332   /* store the row lengths to the file */
1333   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1334   if (!rank) {
1335     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     for (i=1; i<size; i++) {
1337       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1338       rlen = range[i+1] - range[i];
1339       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1341     }
1342     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1343   } else {
1344     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1345     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1346     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1347   }
1348   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1349 
1350   /* load up the local column indices */
1351   nzmax = nz; /* )th processor needs space a largest processor needs */
1352   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1353   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1354   cnt  = 0;
1355   for (i=0; i<mat->rmap->n; i++) {
1356     for (j=B->i[i]; j<B->i[i+1]; j++) {
1357       if ( (col = garray[B->j[j]]) > cstart) break;
1358       column_indices[cnt++] = col;
1359     }
1360     for (k=A->i[i]; k<A->i[i+1]; k++) {
1361       column_indices[cnt++] = A->j[k] + cstart;
1362     }
1363     for (; j<B->i[i+1]; j++) {
1364       column_indices[cnt++] = garray[B->j[j]];
1365     }
1366   }
1367   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1368 
1369   /* store the column indices to the file */
1370    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1371   if (!rank) {
1372     MPI_Status status;
1373     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     for (i=1; i<size; i++) {
1375       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1376       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1377       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1378       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1380     }
1381      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1382   } else {
1383     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1384     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1385     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1386     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1387   }
1388   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1389 
1390   /* load up the local column values */
1391   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1392   cnt  = 0;
1393   for (i=0; i<mat->rmap->n; i++) {
1394     for (j=B->i[i]; j<B->i[i+1]; j++) {
1395       if ( garray[B->j[j]] > cstart) break;
1396       column_values[cnt++] = B->a[j];
1397     }
1398     for (k=A->i[i]; k<A->i[i+1]; k++) {
1399       column_values[cnt++] = A->a[k];
1400     }
1401     for (; j<B->i[i+1]; j++) {
1402       column_values[cnt++] = B->a[j];
1403     }
1404   }
1405   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1406 
1407   /* store the column values to the file */
1408    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1409   if (!rank) {
1410     MPI_Status status;
1411     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     for (i=1; i<size; i++) {
1413        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1414       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1415       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1416       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1418     }
1419     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1420   } else {
1421     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1422     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1424     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1425   }
1426   ierr = PetscFree(column_values);CHKERRQ(ierr);
1427 
1428   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1429   if (file) {
1430     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1431   }
1432 
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1439 {
1440   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1441   PetscErrorCode    ierr;
1442   PetscMPIInt       rank = aij->rank,size = aij->size;
1443   PetscBool         isdraw,iascii,isbinary;
1444   PetscViewer       sviewer;
1445   PetscViewerFormat format;
1446 
1447   PetscFunctionBegin;
1448   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1450   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1451   if (iascii) {
1452     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1453     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1454       MatInfo    info;
1455       PetscBool  inodes;
1456 
1457       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1458       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1460       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1461       if (!inodes) {
1462         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1463 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1464       } else {
1465         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1466 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1467       }
1468       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1469       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1470       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1471       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1472       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1473       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1475       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1476       PetscFunctionReturn(0);
1477     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1478       PetscInt   inodecount,inodelimit,*inodes;
1479       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1480       if (inodes) {
1481         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1482       } else {
1483         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1484       }
1485       PetscFunctionReturn(0);
1486     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1487       PetscFunctionReturn(0);
1488     }
1489   } else if (isbinary) {
1490     if (size == 1) {
1491       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1492       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1493     } else {
1494       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1495     }
1496     PetscFunctionReturn(0);
1497   } else if (isdraw) {
1498     PetscDraw  draw;
1499     PetscBool  isnull;
1500     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1501     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1502   }
1503 
1504   if (size == 1) {
1505     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1506     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1507   } else {
1508     /* assemble the entire matrix onto first processor. */
1509     Mat         A;
1510     Mat_SeqAIJ  *Aloc;
1511     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1512     MatScalar   *a;
1513 
1514     if (mat->rmap->N > 1024) {
1515       PetscBool  flg = PETSC_FALSE;
1516 
1517       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1518       if (!flg) {
1519         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1520       }
1521     }
1522 
1523     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1524     if (!rank) {
1525       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1526     } else {
1527       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1528     }
1529     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1530     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1531     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1532     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1533     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1534 
1535     /* copy over the A part */
1536     Aloc = (Mat_SeqAIJ*)aij->A->data;
1537     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1538     row = mat->rmap->rstart;
1539     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1540     for (i=0; i<m; i++) {
1541       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1542       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1543     }
1544     aj = Aloc->j;
1545     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1546 
1547     /* copy over the B part */
1548     Aloc = (Mat_SeqAIJ*)aij->B->data;
1549     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550     row  = mat->rmap->rstart;
1551     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1552     ct   = cols;
1553     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1554     for (i=0; i<m; i++) {
1555       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1556       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1557     }
1558     ierr = PetscFree(ct);CHKERRQ(ierr);
1559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1561     /*
1562        Everyone has to call to draw the matrix since the graphics waits are
1563        synchronized across all processors that share the PetscDraw object
1564     */
1565     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1566     if (!rank) {
1567       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1568       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1569       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1570       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1571     }
1572     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1573     ierr = MatDestroy(&A);CHKERRQ(ierr);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatView_MPIAIJ"
1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1581 {
1582   PetscErrorCode ierr;
1583   PetscBool      iascii,isdraw,issocket,isbinary;
1584 
1585   PetscFunctionBegin;
1586   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1587   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1588   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1589   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1590   if (iascii || isdraw || isbinary || issocket) {
1591     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1592   } else {
1593     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatSOR_MPIAIJ"
1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1601 {
1602   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1603   PetscErrorCode ierr;
1604   Vec            bb1 = 0;
1605   PetscBool      hasop;
1606 
1607   PetscFunctionBegin;
1608   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1609     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1610   }
1611 
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622 
1623     while (its--) {
1624       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626 
1627       /* update rhs: bb1 = bb - B*x */
1628       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1629       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1630 
1631       /* local sweep */
1632       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1633     }
1634   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1635     if (flag & SOR_ZERO_INITIAL_GUESS) {
1636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637       its--;
1638     }
1639     while (its--) {
1640       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642 
1643       /* update rhs: bb1 = bb - B*x */
1644       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1645       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1646 
1647       /* local sweep */
1648       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1649     }
1650   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1651     if (flag & SOR_ZERO_INITIAL_GUESS) {
1652       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1653       its--;
1654     }
1655     while (its--) {
1656       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658 
1659       /* update rhs: bb1 = bb - B*x */
1660       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1661       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1662 
1663       /* local sweep */
1664       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1665     }
1666   }  else if (flag & SOR_EISENSTAT) {
1667     Vec         xx1;
1668 
1669     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1670     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1671 
1672     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1674     if (!mat->diag) {
1675       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1676       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1677     }
1678     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1679     if (hasop) {
1680       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1681     } else {
1682       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1683     }
1684     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1685 
1686     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1687 
1688     /* local sweep */
1689     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1690     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1691     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1692   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1693 
1694   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatPermute_MPIAIJ"
1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1701 {
1702   MPI_Comm       comm;
1703   PetscInt       first,local_rowsize,local_colsize;
1704   const PetscInt *rows;
1705   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1706   PetscErrorCode ierr;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1710   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1711   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1712   /* collect the global row permutation and invert it */
1713   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1714   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1715   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1716   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1717   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1718   /* get the local target indices */
1719   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1720   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1721   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1722   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1723   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1724   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1725   /* the column permutation is so much easier;
1726      make a local version of 'colp' and invert it */
1727   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1728   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1729   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1730   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1731   /* now we just get the submatrix */
1732   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1733   /* clean up */
1734   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1735   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1742 {
1743   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1744   Mat            A = mat->A,B = mat->B;
1745   PetscErrorCode ierr;
1746   PetscReal      isend[5],irecv[5];
1747 
1748   PetscFunctionBegin;
1749   info->block_size     = 1.0;
1750   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1751   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1752   isend[3] = info->memory;  isend[4] = info->mallocs;
1753   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1754   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1755   isend[3] += info->memory;  isend[4] += info->mallocs;
1756   if (flag == MAT_LOCAL) {
1757     info->nz_used      = isend[0];
1758     info->nz_allocated = isend[1];
1759     info->nz_unneeded  = isend[2];
1760     info->memory       = isend[3];
1761     info->mallocs      = isend[4];
1762   } else if (flag == MAT_GLOBAL_MAX) {
1763     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1764     info->nz_used      = irecv[0];
1765     info->nz_allocated = irecv[1];
1766     info->nz_unneeded  = irecv[2];
1767     info->memory       = irecv[3];
1768     info->mallocs      = irecv[4];
1769   } else if (flag == MAT_GLOBAL_SUM) {
1770     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1771     info->nz_used      = irecv[0];
1772     info->nz_allocated = irecv[1];
1773     info->nz_unneeded  = irecv[2];
1774     info->memory       = irecv[3];
1775     info->mallocs      = irecv[4];
1776   }
1777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1778   info->fill_ratio_needed = 0;
1779   info->factor_mallocs    = 0;
1780 
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatSetOption_MPIAIJ"
1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1787 {
1788   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1789   PetscErrorCode ierr;
1790 
1791   PetscFunctionBegin;
1792   switch (op) {
1793   case MAT_NEW_NONZERO_LOCATIONS:
1794   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1795   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1796   case MAT_KEEP_NONZERO_PATTERN:
1797   case MAT_NEW_NONZERO_LOCATION_ERR:
1798   case MAT_USE_INODES:
1799   case MAT_IGNORE_ZERO_ENTRIES:
1800     MatCheckPreallocated(A,1);
1801     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1802     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1803     break;
1804   case MAT_ROW_ORIENTED:
1805     a->roworiented = flg;
1806     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1807     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1808     break;
1809   case MAT_NEW_DIAGONALS:
1810     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1811     break;
1812   case MAT_IGNORE_OFF_PROC_ENTRIES:
1813     a->donotstash = flg;
1814     break;
1815   case MAT_SPD:
1816     A->spd_set                         = PETSC_TRUE;
1817     A->spd                             = flg;
1818     if (flg) {
1819       A->symmetric                     = PETSC_TRUE;
1820       A->structurally_symmetric        = PETSC_TRUE;
1821       A->symmetric_set                 = PETSC_TRUE;
1822       A->structurally_symmetric_set    = PETSC_TRUE;
1823     }
1824     break;
1825   case MAT_SYMMETRIC:
1826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1827     break;
1828   case MAT_STRUCTURALLY_SYMMETRIC:
1829     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1830     break;
1831   case MAT_HERMITIAN:
1832     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1833     break;
1834   case MAT_SYMMETRY_ETERNAL:
1835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1836     break;
1837   default:
1838     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1839   }
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatGetRow_MPIAIJ"
1845 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1846 {
1847   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1848   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1849   PetscErrorCode ierr;
1850   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1851   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1852   PetscInt       *cmap,*idx_p;
1853 
1854   PetscFunctionBegin;
1855   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1856   mat->getrowactive = PETSC_TRUE;
1857 
1858   if (!mat->rowvalues && (idx || v)) {
1859     /*
1860         allocate enough space to hold information from the longest row.
1861     */
1862     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1863     PetscInt   max = 1,tmp;
1864     for (i=0; i<matin->rmap->n; i++) {
1865       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1866       if (max < tmp) { max = tmp; }
1867     }
1868     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1869   }
1870 
1871   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1872   lrow = row - rstart;
1873 
1874   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1875   if (!v)   {pvA = 0; pvB = 0;}
1876   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1877   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1878   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1879   nztot = nzA + nzB;
1880 
1881   cmap  = mat->garray;
1882   if (v  || idx) {
1883     if (nztot) {
1884       /* Sort by increasing column numbers, assuming A and B already sorted */
1885       PetscInt imark = -1;
1886       if (v) {
1887         *v = v_p = mat->rowvalues;
1888         for (i=0; i<nzB; i++) {
1889           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1890           else break;
1891         }
1892         imark = i;
1893         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1894         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1895       }
1896       if (idx) {
1897         *idx = idx_p = mat->rowindices;
1898         if (imark > -1) {
1899           for (i=0; i<imark; i++) {
1900             idx_p[i] = cmap[cworkB[i]];
1901           }
1902         } else {
1903           for (i=0; i<nzB; i++) {
1904             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1905             else break;
1906           }
1907           imark = i;
1908         }
1909         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1910         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1911       }
1912     } else {
1913       if (idx) *idx = 0;
1914       if (v)   *v   = 0;
1915     }
1916   }
1917   *nz = nztot;
1918   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1919   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1925 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1926 {
1927   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1928 
1929   PetscFunctionBegin;
1930   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1931   aij->getrowactive = PETSC_FALSE;
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatNorm_MPIAIJ"
1937 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1938 {
1939   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1940   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1941   PetscErrorCode ierr;
1942   PetscInt       i,j,cstart = mat->cmap->rstart;
1943   PetscReal      sum = 0.0;
1944   MatScalar      *v;
1945 
1946   PetscFunctionBegin;
1947   if (aij->size == 1) {
1948     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1949   } else {
1950     if (type == NORM_FROBENIUS) {
1951       v = amat->a;
1952       for (i=0; i<amat->nz; i++) {
1953 #if defined(PETSC_USE_COMPLEX)
1954         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1955 #else
1956         sum += (*v)*(*v); v++;
1957 #endif
1958       }
1959       v = bmat->a;
1960       for (i=0; i<bmat->nz; i++) {
1961 #if defined(PETSC_USE_COMPLEX)
1962         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1963 #else
1964         sum += (*v)*(*v); v++;
1965 #endif
1966       }
1967       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1968       *norm = PetscSqrtReal(*norm);
1969     } else if (type == NORM_1) { /* max column norm */
1970       PetscReal *tmp,*tmp2;
1971       PetscInt  *jj,*garray = aij->garray;
1972       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1973       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1974       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1975       *norm = 0.0;
1976       v = amat->a; jj = amat->j;
1977       for (j=0; j<amat->nz; j++) {
1978         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1979       }
1980       v = bmat->a; jj = bmat->j;
1981       for (j=0; j<bmat->nz; j++) {
1982         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1983       }
1984       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1985       for (j=0; j<mat->cmap->N; j++) {
1986         if (tmp2[j] > *norm) *norm = tmp2[j];
1987       }
1988       ierr = PetscFree(tmp);CHKERRQ(ierr);
1989       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1990     } else if (type == NORM_INFINITY) { /* max row norm */
1991       PetscReal ntemp = 0.0;
1992       for (j=0; j<aij->A->rmap->n; j++) {
1993         v = amat->a + amat->i[j];
1994         sum = 0.0;
1995         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1996           sum += PetscAbsScalar(*v); v++;
1997         }
1998         v = bmat->a + bmat->i[j];
1999         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2000           sum += PetscAbsScalar(*v); v++;
2001         }
2002         if (sum > ntemp) ntemp = sum;
2003       }
2004       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2005     } else {
2006       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2007     }
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 #undef __FUNCT__
2013 #define __FUNCT__ "MatTranspose_MPIAIJ"
2014 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2015 {
2016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2017   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2018   PetscErrorCode ierr;
2019   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2020   PetscInt       cstart=A->cmap->rstart,ncol;
2021   Mat            B;
2022   MatScalar      *array;
2023 
2024   PetscFunctionBegin;
2025   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2026 
2027   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2028   ai = Aloc->i; aj = Aloc->j;
2029   bi = Bloc->i; bj = Bloc->j;
2030   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2031     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2032     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2033     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2034     for (i=0; i<ai[ma]; i++){
2035       d_nnz[aj[i]] ++;
2036       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2037     }
2038 
2039     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2040     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2041     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2042     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2043     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2044     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2045     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2046   } else {
2047     B = *matout;
2048     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2049     for (i=0; i<ai[ma]; i++){
2050       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2051     }
2052   }
2053 
2054   /* copy over the A part */
2055   array = Aloc->a;
2056   row = A->rmap->rstart;
2057   for (i=0; i<ma; i++) {
2058     ncol = ai[i+1]-ai[i];
2059     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2060     row++; array += ncol; aj += ncol;
2061   }
2062   aj = Aloc->j;
2063   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2064 
2065   /* copy over the B part */
2066   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2067   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2068   array = Bloc->a;
2069   row = A->rmap->rstart;
2070   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2071   cols_tmp = cols;
2072   for (i=0; i<mb; i++) {
2073     ncol = bi[i+1]-bi[i];
2074     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2075     row++; array += ncol; cols_tmp += ncol;
2076   }
2077   ierr = PetscFree(cols);CHKERRQ(ierr);
2078 
2079   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2082     *matout = B;
2083   } else {
2084     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2085   }
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2091 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2092 {
2093   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2094   Mat            a = aij->A,b = aij->B;
2095   PetscErrorCode ierr;
2096   PetscInt       s1,s2,s3;
2097 
2098   PetscFunctionBegin;
2099   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2100   if (rr) {
2101     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2102     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2103     /* Overlap communication with computation. */
2104     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2105   }
2106   if (ll) {
2107     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2108     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2109     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2110   }
2111   /* scale  the diagonal block */
2112   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2113 
2114   if (rr) {
2115     /* Do a scatter end and then right scale the off-diagonal block */
2116     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2117     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2118   }
2119 
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2125 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2126 {
2127   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2128   PetscErrorCode ierr;
2129 
2130   PetscFunctionBegin;
2131   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatEqual_MPIAIJ"
2137 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2138 {
2139   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2140   Mat            a,b,c,d;
2141   PetscBool      flg;
2142   PetscErrorCode ierr;
2143 
2144   PetscFunctionBegin;
2145   a = matA->A; b = matA->B;
2146   c = matB->A; d = matB->B;
2147 
2148   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2149   if (flg) {
2150     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2151   }
2152   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatCopy_MPIAIJ"
2158 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2159 {
2160   PetscErrorCode ierr;
2161   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2162   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2163 
2164   PetscFunctionBegin;
2165   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2166   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2167     /* because of the column compression in the off-processor part of the matrix a->B,
2168        the number of columns in a->B and b->B may be different, hence we cannot call
2169        the MatCopy() directly on the two parts. If need be, we can provide a more
2170        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2171        then copying the submatrices */
2172     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2173   } else {
2174     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2175     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatSetUp_MPIAIJ"
2182 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2183 {
2184   PetscErrorCode ierr;
2185 
2186   PetscFunctionBegin;
2187   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2193 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2194 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2195 {
2196   PetscInt          i,m=Y->rmap->N;
2197   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2198   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2199   const PetscInt    *xi = x->i,*yi = y->i;
2200 
2201   PetscFunctionBegin;
2202   /* Set the number of nonzeros in the new matrix */
2203   for(i=0; i<m; i++) {
2204     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2205     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2206     nnz[i] = 0;
2207     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2208       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2209       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2210       nnz[i]++;
2211     }
2212     for (; k<nzy; k++) nnz[i]++;
2213   }
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 #undef __FUNCT__
2218 #define __FUNCT__ "MatAXPY_MPIAIJ"
2219 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2220 {
2221   PetscErrorCode ierr;
2222   PetscInt       i;
2223   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2224   PetscBLASInt   bnz,one=1;
2225   Mat_SeqAIJ     *x,*y;
2226 
2227   PetscFunctionBegin;
2228   if (str == SAME_NONZERO_PATTERN) {
2229     PetscScalar alpha = a;
2230     x = (Mat_SeqAIJ *)xx->A->data;
2231     y = (Mat_SeqAIJ *)yy->A->data;
2232     bnz = PetscBLASIntCast(x->nz);
2233     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2234     x = (Mat_SeqAIJ *)xx->B->data;
2235     y = (Mat_SeqAIJ *)yy->B->data;
2236     bnz = PetscBLASIntCast(x->nz);
2237     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2238   } else if (str == SUBSET_NONZERO_PATTERN) {
2239     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2240 
2241     x = (Mat_SeqAIJ *)xx->B->data;
2242     y = (Mat_SeqAIJ *)yy->B->data;
2243     if (y->xtoy && y->XtoY != xx->B) {
2244       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2245       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2246     }
2247     if (!y->xtoy) { /* get xtoy */
2248       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2249       y->XtoY = xx->B;
2250       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2251     }
2252     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2253   } else {
2254     Mat B;
2255     PetscInt *nnz_d,*nnz_o;
2256     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2257     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2258     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2259     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2260     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2261     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2262     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2263     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2264     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2265     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2266     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2267     ierr = MatHeaderReplace(Y,B);
2268     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2269     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2270   }
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2275 
2276 #undef __FUNCT__
2277 #define __FUNCT__ "MatConjugate_MPIAIJ"
2278 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2279 {
2280 #if defined(PETSC_USE_COMPLEX)
2281   PetscErrorCode ierr;
2282   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2283 
2284   PetscFunctionBegin;
2285   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2286   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2287 #else
2288   PetscFunctionBegin;
2289 #endif
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatRealPart_MPIAIJ"
2295 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2296 {
2297   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2298   PetscErrorCode ierr;
2299 
2300   PetscFunctionBegin;
2301   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2302   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 #undef __FUNCT__
2307 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2308 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2309 {
2310   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2311   PetscErrorCode ierr;
2312 
2313   PetscFunctionBegin;
2314   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2315   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 #ifdef PETSC_HAVE_PBGL
2320 
2321 #include <boost/parallel/mpi/bsp_process_group.hpp>
2322 #include <boost/graph/distributed/ilu_default_graph.hpp>
2323 #include <boost/graph/distributed/ilu_0_block.hpp>
2324 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2325 #include <boost/graph/distributed/petsc/interface.hpp>
2326 #include <boost/multi_array.hpp>
2327 #include <boost/parallel/distributed_property_map->hpp>
2328 
2329 #undef __FUNCT__
2330 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2331 /*
2332   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2333 */
2334 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2335 {
2336   namespace petsc = boost::distributed::petsc;
2337 
2338   namespace graph_dist = boost::graph::distributed;
2339   using boost::graph::distributed::ilu_default::process_group_type;
2340   using boost::graph::ilu_permuted;
2341 
2342   PetscBool       row_identity, col_identity;
2343   PetscContainer  c;
2344   PetscInt        m, n, M, N;
2345   PetscErrorCode  ierr;
2346 
2347   PetscFunctionBegin;
2348   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2349   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2350   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2351   if (!row_identity || !col_identity) {
2352     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2353   }
2354 
2355   process_group_type pg;
2356   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2357   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2358   lgraph_type&   level_graph = *lgraph_p;
2359   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2360 
2361   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2362   ilu_permuted(level_graph);
2363 
2364   /* put together the new matrix */
2365   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2366   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2367   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2368   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2369   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2370   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2371   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2372   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2373 
2374   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2375   ierr = PetscContainerSetPointer(c, lgraph_p);
2376   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2377   ierr = PetscContainerDestroy(&c);
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 #undef __FUNCT__
2382 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2383 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2384 {
2385   PetscFunctionBegin;
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatSolve_MPIAIJ"
2391 /*
2392   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2393 */
2394 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2395 {
2396   namespace graph_dist = boost::graph::distributed;
2397 
2398   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2399   lgraph_type*   lgraph_p;
2400   PetscContainer c;
2401   PetscErrorCode ierr;
2402 
2403   PetscFunctionBegin;
2404   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2405   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2406   ierr = VecCopy(b, x);CHKERRQ(ierr);
2407 
2408   PetscScalar* array_x;
2409   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2410   PetscInt sx;
2411   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2412 
2413   PetscScalar* array_b;
2414   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2415   PetscInt sb;
2416   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2417 
2418   lgraph_type&   level_graph = *lgraph_p;
2419   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2420 
2421   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2422   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2423                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2424 
2425   typedef boost::iterator_property_map<array_ref_type::iterator,
2426                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2427   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2428                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2429 
2430   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2431 
2432   PetscFunctionReturn(0);
2433 }
2434 #endif
2435 
2436 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2437   PetscInt       nzlocal,nsends,nrecvs;
2438   PetscMPIInt    *send_rank,*recv_rank;
2439   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2440   PetscScalar    *sbuf_a,**rbuf_a;
2441   PetscErrorCode (*Destroy)(Mat);
2442 } Mat_Redundant;
2443 
2444 #undef __FUNCT__
2445 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2446 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2447 {
2448   PetscErrorCode       ierr;
2449   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2450   PetscInt             i;
2451 
2452   PetscFunctionBegin;
2453   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2454   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2455   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2456   for (i=0; i<redund->nrecvs; i++){
2457     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2458     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2459   }
2460   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2461   ierr = PetscFree(redund);CHKERRQ(ierr);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatDestroy_MatRedundant"
2467 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2468 {
2469   PetscErrorCode  ierr;
2470   PetscContainer  container;
2471   Mat_Redundant   *redund=PETSC_NULL;
2472 
2473   PetscFunctionBegin;
2474   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2475   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2476   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2477   A->ops->destroy = redund->Destroy;
2478   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2479   if (A->ops->destroy) {
2480     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2481   }
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2487 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2488 {
2489   PetscMPIInt    rank,size;
2490   MPI_Comm       comm=((PetscObject)mat)->comm;
2491   PetscErrorCode ierr;
2492   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2493   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2494   PetscInt       *rowrange=mat->rmap->range;
2495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2496   Mat            A=aij->A,B=aij->B,C=*matredundant;
2497   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2498   PetscScalar    *sbuf_a;
2499   PetscInt       nzlocal=a->nz+b->nz;
2500   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2501   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2502   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2503   MatScalar      *aworkA,*aworkB;
2504   PetscScalar    *vals;
2505   PetscMPIInt    tag1,tag2,tag3,imdex;
2506   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2507                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2508   MPI_Status     recv_status,*send_status;
2509   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2510   PetscInt       **rbuf_j=PETSC_NULL;
2511   PetscScalar    **rbuf_a=PETSC_NULL;
2512   Mat_Redundant  *redund=PETSC_NULL;
2513   PetscContainer container;
2514 
2515   PetscFunctionBegin;
2516   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2517   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2518 
2519   if (reuse == MAT_REUSE_MATRIX) {
2520     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2521     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2522     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2523     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2524     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2525     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2526     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2527     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2528 
2529     nsends    = redund->nsends;
2530     nrecvs    = redund->nrecvs;
2531     send_rank = redund->send_rank;
2532     recv_rank = redund->recv_rank;
2533     sbuf_nz   = redund->sbuf_nz;
2534     rbuf_nz   = redund->rbuf_nz;
2535     sbuf_j    = redund->sbuf_j;
2536     sbuf_a    = redund->sbuf_a;
2537     rbuf_j    = redund->rbuf_j;
2538     rbuf_a    = redund->rbuf_a;
2539   }
2540 
2541   if (reuse == MAT_INITIAL_MATRIX){
2542     PetscMPIInt  subrank,subsize;
2543     PetscInt     nleftover,np_subcomm;
2544     /* get the destination processors' id send_rank, nsends and nrecvs */
2545     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2546     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2547     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2548     np_subcomm = size/nsubcomm;
2549     nleftover  = size - nsubcomm*np_subcomm;
2550     nsends = 0; nrecvs = 0;
2551     for (i=0; i<size; i++){ /* i=rank*/
2552       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2553         send_rank[nsends] = i; nsends++;
2554         recv_rank[nrecvs++] = i;
2555       }
2556     }
2557     if (rank >= size - nleftover){/* this proc is a leftover processor */
2558       i = size-nleftover-1;
2559       j = 0;
2560       while (j < nsubcomm - nleftover){
2561         send_rank[nsends++] = i;
2562         i--; j++;
2563       }
2564     }
2565 
2566     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2567       for (i=0; i<nleftover; i++){
2568         recv_rank[nrecvs++] = size-nleftover+i;
2569       }
2570     }
2571 
2572     /* allocate sbuf_j, sbuf_a */
2573     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2574     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2575     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2576   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2577 
2578   /* copy mat's local entries into the buffers */
2579   if (reuse == MAT_INITIAL_MATRIX){
2580     rownz_max = 0;
2581     rptr = sbuf_j;
2582     cols = sbuf_j + rend-rstart + 1;
2583     vals = sbuf_a;
2584     rptr[0] = 0;
2585     for (i=0; i<rend-rstart; i++){
2586       row = i + rstart;
2587       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2588       ncols  = nzA + nzB;
2589       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2590       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2591       /* load the column indices for this row into cols */
2592       lwrite = 0;
2593       for (l=0; l<nzB; l++) {
2594         if ((ctmp = bmap[cworkB[l]]) < cstart){
2595           vals[lwrite]   = aworkB[l];
2596           cols[lwrite++] = ctmp;
2597         }
2598       }
2599       for (l=0; l<nzA; l++){
2600         vals[lwrite]   = aworkA[l];
2601         cols[lwrite++] = cstart + cworkA[l];
2602       }
2603       for (l=0; l<nzB; l++) {
2604         if ((ctmp = bmap[cworkB[l]]) >= cend){
2605           vals[lwrite]   = aworkB[l];
2606           cols[lwrite++] = ctmp;
2607         }
2608       }
2609       vals += ncols;
2610       cols += ncols;
2611       rptr[i+1] = rptr[i] + ncols;
2612       if (rownz_max < ncols) rownz_max = ncols;
2613     }
2614     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2615   } else { /* only copy matrix values into sbuf_a */
2616     rptr = sbuf_j;
2617     vals = sbuf_a;
2618     rptr[0] = 0;
2619     for (i=0; i<rend-rstart; i++){
2620       row = i + rstart;
2621       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2622       ncols  = nzA + nzB;
2623       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2624       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2625       lwrite = 0;
2626       for (l=0; l<nzB; l++) {
2627         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2628       }
2629       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2630       for (l=0; l<nzB; l++) {
2631         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2632       }
2633       vals += ncols;
2634       rptr[i+1] = rptr[i] + ncols;
2635     }
2636   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2637 
2638   /* send nzlocal to others, and recv other's nzlocal */
2639   /*--------------------------------------------------*/
2640   if (reuse == MAT_INITIAL_MATRIX){
2641     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2642     s_waits2 = s_waits3 + nsends;
2643     s_waits1 = s_waits2 + nsends;
2644     r_waits1 = s_waits1 + nsends;
2645     r_waits2 = r_waits1 + nrecvs;
2646     r_waits3 = r_waits2 + nrecvs;
2647   } else {
2648     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2649     r_waits3 = s_waits3 + nsends;
2650   }
2651 
2652   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2653   if (reuse == MAT_INITIAL_MATRIX){
2654     /* get new tags to keep the communication clean */
2655     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2656     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2657     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2658 
2659     /* post receives of other's nzlocal */
2660     for (i=0; i<nrecvs; i++){
2661       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2662     }
2663     /* send nzlocal to others */
2664     for (i=0; i<nsends; i++){
2665       sbuf_nz[i] = nzlocal;
2666       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2667     }
2668     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2669     count = nrecvs;
2670     while (count) {
2671       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2672       recv_rank[imdex] = recv_status.MPI_SOURCE;
2673       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2674       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2675 
2676       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2677       rbuf_nz[imdex] += i + 2;
2678       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2679       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2680       count--;
2681     }
2682     /* wait on sends of nzlocal */
2683     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2684     /* send mat->i,j to others, and recv from other's */
2685     /*------------------------------------------------*/
2686     for (i=0; i<nsends; i++){
2687       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2688       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2689     }
2690     /* wait on receives of mat->i,j */
2691     /*------------------------------*/
2692     count = nrecvs;
2693     while (count) {
2694       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2695       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2696       count--;
2697     }
2698     /* wait on sends of mat->i,j */
2699     /*---------------------------*/
2700     if (nsends) {
2701       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2702     }
2703   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2704 
2705   /* post receives, send and receive mat->a */
2706   /*----------------------------------------*/
2707   for (imdex=0; imdex<nrecvs; imdex++) {
2708     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2709   }
2710   for (i=0; i<nsends; i++){
2711     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2712   }
2713   count = nrecvs;
2714   while (count) {
2715     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2716     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2717     count--;
2718   }
2719   if (nsends) {
2720     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2721   }
2722 
2723   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2724 
2725   /* create redundant matrix */
2726   /*-------------------------*/
2727   if (reuse == MAT_INITIAL_MATRIX){
2728     /* compute rownz_max for preallocation */
2729     for (imdex=0; imdex<nrecvs; imdex++){
2730       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2731       rptr = rbuf_j[imdex];
2732       for (i=0; i<j; i++){
2733         ncols = rptr[i+1] - rptr[i];
2734         if (rownz_max < ncols) rownz_max = ncols;
2735       }
2736     }
2737 
2738     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2739     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2740     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2741     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2742     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2743     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2744   } else {
2745     C = *matredundant;
2746   }
2747 
2748   /* insert local matrix entries */
2749   rptr = sbuf_j;
2750   cols = sbuf_j + rend-rstart + 1;
2751   vals = sbuf_a;
2752   for (i=0; i<rend-rstart; i++){
2753     row   = i + rstart;
2754     ncols = rptr[i+1] - rptr[i];
2755     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2756     vals += ncols;
2757     cols += ncols;
2758   }
2759   /* insert received matrix entries */
2760   for (imdex=0; imdex<nrecvs; imdex++){
2761     rstart = rowrange[recv_rank[imdex]];
2762     rend   = rowrange[recv_rank[imdex]+1];
2763     rptr = rbuf_j[imdex];
2764     cols = rbuf_j[imdex] + rend-rstart + 1;
2765     vals = rbuf_a[imdex];
2766     for (i=0; i<rend-rstart; i++){
2767       row   = i + rstart;
2768       ncols = rptr[i+1] - rptr[i];
2769       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2770       vals += ncols;
2771       cols += ncols;
2772     }
2773   }
2774   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2775   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2776   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2777   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2778   if (reuse == MAT_INITIAL_MATRIX) {
2779     PetscContainer container;
2780     *matredundant = C;
2781     /* create a supporting struct and attach it to C for reuse */
2782     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2783     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2784     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2785     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2786     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2787     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2788 
2789     redund->nzlocal = nzlocal;
2790     redund->nsends  = nsends;
2791     redund->nrecvs  = nrecvs;
2792     redund->send_rank = send_rank;
2793     redund->recv_rank = recv_rank;
2794     redund->sbuf_nz = sbuf_nz;
2795     redund->rbuf_nz = rbuf_nz;
2796     redund->sbuf_j  = sbuf_j;
2797     redund->sbuf_a  = sbuf_a;
2798     redund->rbuf_j  = rbuf_j;
2799     redund->rbuf_a  = rbuf_a;
2800 
2801     redund->Destroy = C->ops->destroy;
2802     C->ops->destroy = MatDestroy_MatRedundant;
2803   }
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 #undef __FUNCT__
2808 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2809 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2810 {
2811   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2812   PetscErrorCode ierr;
2813   PetscInt       i,*idxb = 0;
2814   PetscScalar    *va,*vb;
2815   Vec            vtmp;
2816 
2817   PetscFunctionBegin;
2818   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2819   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2820   if (idx) {
2821     for (i=0; i<A->rmap->n; i++) {
2822       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2823     }
2824   }
2825 
2826   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2827   if (idx) {
2828     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2829   }
2830   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2831   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2832 
2833   for (i=0; i<A->rmap->n; i++){
2834     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2835       va[i] = vb[i];
2836       if (idx) idx[i] = a->garray[idxb[i]];
2837     }
2838   }
2839 
2840   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2841   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2842   ierr = PetscFree(idxb);CHKERRQ(ierr);
2843   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2844   PetscFunctionReturn(0);
2845 }
2846 
2847 #undef __FUNCT__
2848 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2849 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2850 {
2851   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2852   PetscErrorCode ierr;
2853   PetscInt       i,*idxb = 0;
2854   PetscScalar    *va,*vb;
2855   Vec            vtmp;
2856 
2857   PetscFunctionBegin;
2858   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2859   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2860   if (idx) {
2861     for (i=0; i<A->cmap->n; i++) {
2862       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2863     }
2864   }
2865 
2866   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2867   if (idx) {
2868     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2869   }
2870   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2871   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2872 
2873   for (i=0; i<A->rmap->n; i++){
2874     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2875       va[i] = vb[i];
2876       if (idx) idx[i] = a->garray[idxb[i]];
2877     }
2878   }
2879 
2880   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2881   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2882   ierr = PetscFree(idxb);CHKERRQ(ierr);
2883   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 #undef __FUNCT__
2888 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2889 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2890 {
2891   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2892   PetscInt       n      = A->rmap->n;
2893   PetscInt       cstart = A->cmap->rstart;
2894   PetscInt      *cmap   = mat->garray;
2895   PetscInt      *diagIdx, *offdiagIdx;
2896   Vec            diagV, offdiagV;
2897   PetscScalar   *a, *diagA, *offdiagA;
2898   PetscInt       r;
2899   PetscErrorCode ierr;
2900 
2901   PetscFunctionBegin;
2902   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2903   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2904   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2905   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2906   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2907   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2908   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2909   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2910   for(r = 0; r < n; ++r) {
2911     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2912       a[r]   = diagA[r];
2913       idx[r] = cstart + diagIdx[r];
2914     } else {
2915       a[r]   = offdiagA[r];
2916       idx[r] = cmap[offdiagIdx[r]];
2917     }
2918   }
2919   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2920   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2921   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2922   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2923   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2924   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 #undef __FUNCT__
2929 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2930 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2931 {
2932   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2933   PetscInt       n      = A->rmap->n;
2934   PetscInt       cstart = A->cmap->rstart;
2935   PetscInt      *cmap   = mat->garray;
2936   PetscInt      *diagIdx, *offdiagIdx;
2937   Vec            diagV, offdiagV;
2938   PetscScalar   *a, *diagA, *offdiagA;
2939   PetscInt       r;
2940   PetscErrorCode ierr;
2941 
2942   PetscFunctionBegin;
2943   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2944   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2945   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2946   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2947   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2948   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2949   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2950   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2951   for(r = 0; r < n; ++r) {
2952     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2953       a[r]   = diagA[r];
2954       idx[r] = cstart + diagIdx[r];
2955     } else {
2956       a[r]   = offdiagA[r];
2957       idx[r] = cmap[offdiagIdx[r]];
2958     }
2959   }
2960   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2961   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2962   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2963   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2964   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2965   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2966   PetscFunctionReturn(0);
2967 }
2968 
2969 #undef __FUNCT__
2970 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2971 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2972 {
2973   PetscErrorCode ierr;
2974   Mat            *dummy;
2975 
2976   PetscFunctionBegin;
2977   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2978   *newmat = *dummy;
2979   ierr = PetscFree(dummy);CHKERRQ(ierr);
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2984 
2985 #undef __FUNCT__
2986 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2987 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2988 {
2989   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2990   PetscErrorCode ierr;
2991 
2992   PetscFunctionBegin;
2993   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2994   PetscFunctionReturn(0);
2995 }
2996 
2997 
2998 /* -------------------------------------------------------------------*/
2999 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3000        MatGetRow_MPIAIJ,
3001        MatRestoreRow_MPIAIJ,
3002        MatMult_MPIAIJ,
3003 /* 4*/ MatMultAdd_MPIAIJ,
3004        MatMultTranspose_MPIAIJ,
3005        MatMultTransposeAdd_MPIAIJ,
3006 #ifdef PETSC_HAVE_PBGL
3007        MatSolve_MPIAIJ,
3008 #else
3009        0,
3010 #endif
3011        0,
3012        0,
3013 /*10*/ 0,
3014        0,
3015        0,
3016        MatSOR_MPIAIJ,
3017        MatTranspose_MPIAIJ,
3018 /*15*/ MatGetInfo_MPIAIJ,
3019        MatEqual_MPIAIJ,
3020        MatGetDiagonal_MPIAIJ,
3021        MatDiagonalScale_MPIAIJ,
3022        MatNorm_MPIAIJ,
3023 /*20*/ MatAssemblyBegin_MPIAIJ,
3024        MatAssemblyEnd_MPIAIJ,
3025        MatSetOption_MPIAIJ,
3026        MatZeroEntries_MPIAIJ,
3027 /*24*/ MatZeroRows_MPIAIJ,
3028        0,
3029 #ifdef PETSC_HAVE_PBGL
3030        0,
3031 #else
3032        0,
3033 #endif
3034        0,
3035        0,
3036 /*29*/ MatSetUp_MPIAIJ,
3037 #ifdef PETSC_HAVE_PBGL
3038        0,
3039 #else
3040        0,
3041 #endif
3042        0,
3043        0,
3044        0,
3045 /*34*/ MatDuplicate_MPIAIJ,
3046        0,
3047        0,
3048        0,
3049        0,
3050 /*39*/ MatAXPY_MPIAIJ,
3051        MatGetSubMatrices_MPIAIJ,
3052        MatIncreaseOverlap_MPIAIJ,
3053        MatGetValues_MPIAIJ,
3054        MatCopy_MPIAIJ,
3055 /*44*/ MatGetRowMax_MPIAIJ,
3056        MatScale_MPIAIJ,
3057        0,
3058        0,
3059        MatZeroRowsColumns_MPIAIJ,
3060 /*49*/ 0,
3061        0,
3062        0,
3063        0,
3064        0,
3065 /*54*/ MatFDColoringCreate_MPIAIJ,
3066        0,
3067        MatSetUnfactored_MPIAIJ,
3068        0, /* MatPermute_MPIAIJ, impl currently broken */
3069        0,
3070 /*59*/ MatGetSubMatrix_MPIAIJ,
3071        MatDestroy_MPIAIJ,
3072        MatView_MPIAIJ,
3073        0,
3074        0,
3075 /*64*/ 0,
3076        0,
3077        0,
3078        0,
3079        0,
3080 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3081        MatGetRowMinAbs_MPIAIJ,
3082        0,
3083        MatSetColoring_MPIAIJ,
3084 #if defined(PETSC_HAVE_ADIC)
3085        MatSetValuesAdic_MPIAIJ,
3086 #else
3087        0,
3088 #endif
3089        MatSetValuesAdifor_MPIAIJ,
3090 /*75*/ MatFDColoringApply_AIJ,
3091        0,
3092        0,
3093        0,
3094        0,
3095 /*80*/ 0,
3096        0,
3097        0,
3098 /*83*/ MatLoad_MPIAIJ,
3099        0,
3100        0,
3101        0,
3102        0,
3103        0,
3104 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3105        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3106        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3107        MatPtAP_Basic,
3108        MatPtAPSymbolic_MPIAIJ,
3109 /*94*/ MatPtAPNumeric_MPIAIJ,
3110        0,
3111        0,
3112        0,
3113        0,
3114 /*99*/ 0,
3115        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3116        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3117        MatConjugate_MPIAIJ,
3118        0,
3119 /*104*/MatSetValuesRow_MPIAIJ,
3120        MatRealPart_MPIAIJ,
3121        MatImaginaryPart_MPIAIJ,
3122        0,
3123        0,
3124 /*109*/0,
3125        MatGetRedundantMatrix_MPIAIJ,
3126        MatGetRowMin_MPIAIJ,
3127        0,
3128        0,
3129 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3130        0,
3131        0,
3132        0,
3133        0,
3134 /*119*/0,
3135        0,
3136        0,
3137        0,
3138        MatGetMultiProcBlock_MPIAIJ,
3139 /*124*/MatFindNonZeroRows_MPIAIJ,
3140        MatGetColumnNorms_MPIAIJ,
3141        MatInvertBlockDiagonal_MPIAIJ,
3142        0,
3143        MatGetSubMatricesParallel_MPIAIJ,
3144 /*129*/0,
3145        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3146        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3147        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3148        0,
3149 /*134*/0,
3150        0,
3151        0,
3152        0,
3153        0
3154 };
3155 
3156 /* ----------------------------------------------------------------------------------------*/
3157 
3158 EXTERN_C_BEGIN
3159 #undef __FUNCT__
3160 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3161 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3162 {
3163   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3164   PetscErrorCode ierr;
3165 
3166   PetscFunctionBegin;
3167   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3168   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3169   PetscFunctionReturn(0);
3170 }
3171 EXTERN_C_END
3172 
3173 EXTERN_C_BEGIN
3174 #undef __FUNCT__
3175 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3176 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3177 {
3178   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3179   PetscErrorCode ierr;
3180 
3181   PetscFunctionBegin;
3182   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3183   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3184   PetscFunctionReturn(0);
3185 }
3186 EXTERN_C_END
3187 
3188 EXTERN_C_BEGIN
3189 #undef __FUNCT__
3190 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3191 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3192 {
3193   Mat_MPIAIJ     *b;
3194   PetscErrorCode ierr;
3195   PetscInt       i;
3196   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3197 
3198   PetscFunctionBegin;
3199   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3200   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3201   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3202   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3203   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3204   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3205 
3206   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3207   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3208   if (d_nnz) {
3209     for (i=0; i<B->rmap->n; i++) {
3210       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3211     }
3212   }
3213   if (o_nnz) {
3214     for (i=0; i<B->rmap->n; i++) {
3215       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3216     }
3217   }
3218   b = (Mat_MPIAIJ*)B->data;
3219 
3220   if (!B->preallocated) {
3221     /* Explicitly create 2 MATSEQAIJ matrices. */
3222     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3223     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3224     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3225     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3226     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3227     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3228     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3229     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3230     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3231     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3232   }
3233 
3234   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3235   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3236   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3237   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3238   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3239   B->preallocated = PETSC_TRUE;
3240   PetscFunctionReturn(0);
3241 }
3242 EXTERN_C_END
3243 
3244 #undef __FUNCT__
3245 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3246 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3247 {
3248   Mat            mat;
3249   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3250   PetscErrorCode ierr;
3251 
3252   PetscFunctionBegin;
3253   *newmat       = 0;
3254   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3255   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3256   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3257   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3258   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3259   a    = (Mat_MPIAIJ*)mat->data;
3260 
3261   mat->factortype    = matin->factortype;
3262   mat->rmap->bs      = matin->rmap->bs;
3263   mat->cmap->bs      = matin->cmap->bs;
3264   mat->assembled    = PETSC_TRUE;
3265   mat->insertmode   = NOT_SET_VALUES;
3266   mat->preallocated = PETSC_TRUE;
3267 
3268   a->size           = oldmat->size;
3269   a->rank           = oldmat->rank;
3270   a->donotstash     = oldmat->donotstash;
3271   a->roworiented    = oldmat->roworiented;
3272   a->rowindices     = 0;
3273   a->rowvalues      = 0;
3274   a->getrowactive   = PETSC_FALSE;
3275 
3276   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3277   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3278 
3279   if (oldmat->colmap) {
3280 #if defined (PETSC_USE_CTABLE)
3281     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3282 #else
3283     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3284     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3285     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3286 #endif
3287   } else a->colmap = 0;
3288   if (oldmat->garray) {
3289     PetscInt len;
3290     len  = oldmat->B->cmap->n;
3291     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3292     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3293     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3294   } else a->garray = 0;
3295 
3296   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3297   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3298   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3299   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3300   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3301   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3302   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3303   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3304   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3305   *newmat = mat;
3306   PetscFunctionReturn(0);
3307 }
3308 
3309 
3310 
3311 #undef __FUNCT__
3312 #define __FUNCT__ "MatLoad_MPIAIJ"
3313 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3314 {
3315   PetscScalar    *vals,*svals;
3316   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3317   PetscErrorCode ierr;
3318   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3319   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3320   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3321   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3322   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3323   int            fd;
3324   PetscInt       bs = 1;
3325 
3326   PetscFunctionBegin;
3327   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3328   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3329   if (!rank) {
3330     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3331     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3332     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3333   }
3334 
3335   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3336   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3337   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3338 
3339   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3340 
3341   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3342   M = header[1]; N = header[2];
3343   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3344   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3345   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3346 
3347   /* If global sizes are set, check if they are consistent with that given in the file */
3348   if (sizesset) {
3349     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3350   }
3351   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3352   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3353 
3354   /* determine ownership of all (block) rows */
3355   if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3356   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3357   else m = newMat->rmap->n; /* Set by user */
3358 
3359   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3360   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3361 
3362   /* First process needs enough room for process with most rows */
3363   if (!rank) {
3364     mmax       = rowners[1];
3365     for (i=2; i<size; i++) {
3366       mmax = PetscMax(mmax,rowners[i]);
3367     }
3368   } else mmax = m;
3369 
3370   rowners[0] = 0;
3371   for (i=2; i<=size; i++) {
3372     rowners[i] += rowners[i-1];
3373   }
3374   rstart = rowners[rank];
3375   rend   = rowners[rank+1];
3376 
3377   /* distribute row lengths to all processors */
3378   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3379   if (!rank) {
3380     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3381     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3382     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3383     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3384     for (j=0; j<m; j++) {
3385       procsnz[0] += ourlens[j];
3386     }
3387     for (i=1; i<size; i++) {
3388       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3389       /* calculate the number of nonzeros on each processor */
3390       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3391         procsnz[i] += rowlengths[j];
3392       }
3393       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3394     }
3395     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3396   } else {
3397     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3398   }
3399 
3400   if (!rank) {
3401     /* determine max buffer needed and allocate it */
3402     maxnz = 0;
3403     for (i=0; i<size; i++) {
3404       maxnz = PetscMax(maxnz,procsnz[i]);
3405     }
3406     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3407 
3408     /* read in my part of the matrix column indices  */
3409     nz   = procsnz[0];
3410     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3411     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3412 
3413     /* read in every one elses and ship off */
3414     for (i=1; i<size; i++) {
3415       nz     = procsnz[i];
3416       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3417       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3418     }
3419     ierr = PetscFree(cols);CHKERRQ(ierr);
3420   } else {
3421     /* determine buffer space needed for message */
3422     nz = 0;
3423     for (i=0; i<m; i++) {
3424       nz += ourlens[i];
3425     }
3426     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3427 
3428     /* receive message of column indices*/
3429     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3430   }
3431 
3432   /* determine column ownership if matrix is not square */
3433   if (N != M) {
3434     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3435     else n = newMat->cmap->n;
3436     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3437     cstart = cend - n;
3438   } else {
3439     cstart = rstart;
3440     cend   = rend;
3441     n      = cend - cstart;
3442   }
3443 
3444   /* loop over local rows, determining number of off diagonal entries */
3445   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3446   jj = 0;
3447   for (i=0; i<m; i++) {
3448     for (j=0; j<ourlens[i]; j++) {
3449       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3450       jj++;
3451     }
3452   }
3453 
3454   for (i=0; i<m; i++) {
3455     ourlens[i] -= offlens[i];
3456   }
3457   if (!sizesset) {
3458     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3459   }
3460 
3461   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3462 
3463   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3464 
3465   for (i=0; i<m; i++) {
3466     ourlens[i] += offlens[i];
3467   }
3468 
3469   if (!rank) {
3470     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3471 
3472     /* read in my part of the matrix numerical values  */
3473     nz   = procsnz[0];
3474     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3475 
3476     /* insert into matrix */
3477     jj      = rstart;
3478     smycols = mycols;
3479     svals   = vals;
3480     for (i=0; i<m; i++) {
3481       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3482       smycols += ourlens[i];
3483       svals   += ourlens[i];
3484       jj++;
3485     }
3486 
3487     /* read in other processors and ship out */
3488     for (i=1; i<size; i++) {
3489       nz     = procsnz[i];
3490       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3491       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3492     }
3493     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3494   } else {
3495     /* receive numeric values */
3496     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3497 
3498     /* receive message of values*/
3499     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3500 
3501     /* insert into matrix */
3502     jj      = rstart;
3503     smycols = mycols;
3504     svals   = vals;
3505     for (i=0; i<m; i++) {
3506       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3507       smycols += ourlens[i];
3508       svals   += ourlens[i];
3509       jj++;
3510     }
3511   }
3512   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3513   ierr = PetscFree(vals);CHKERRQ(ierr);
3514   ierr = PetscFree(mycols);CHKERRQ(ierr);
3515   ierr = PetscFree(rowners);CHKERRQ(ierr);
3516   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3517   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3518 
3519   PetscFunctionReturn(0);
3520 }
3521 
3522 #undef __FUNCT__
3523 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3524 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3525 {
3526   PetscErrorCode ierr;
3527   IS             iscol_local;
3528   PetscInt       csize;
3529 
3530   PetscFunctionBegin;
3531   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3532   if (call == MAT_REUSE_MATRIX) {
3533     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3534     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3535   } else {
3536     PetscInt cbs;
3537     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3538     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3539     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3540   }
3541   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3542   if (call == MAT_INITIAL_MATRIX) {
3543     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3544     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3545   }
3546   PetscFunctionReturn(0);
3547 }
3548 
3549 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3550 #undef __FUNCT__
3551 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3552 /*
3553     Not great since it makes two copies of the submatrix, first an SeqAIJ
3554   in local and then by concatenating the local matrices the end result.
3555   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3556 
3557   Note: This requires a sequential iscol with all indices.
3558 */
3559 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3560 {
3561   PetscErrorCode ierr;
3562   PetscMPIInt    rank,size;
3563   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3564   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3565   PetscBool      allcolumns, colflag;
3566   Mat            M,Mreuse;
3567   MatScalar      *vwork,*aa;
3568   MPI_Comm       comm = ((PetscObject)mat)->comm;
3569   Mat_SeqAIJ     *aij;
3570 
3571 
3572   PetscFunctionBegin;
3573   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3574   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3575 
3576   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3577   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3578   if (colflag && ncol == mat->cmap->N){
3579     allcolumns = PETSC_TRUE;
3580   } else {
3581     allcolumns = PETSC_FALSE;
3582   }
3583   if (call ==  MAT_REUSE_MATRIX) {
3584     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3585     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3586     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3587   } else {
3588     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3589   }
3590 
3591   /*
3592       m - number of local rows
3593       n - number of columns (same on all processors)
3594       rstart - first row in new global matrix generated
3595   */
3596   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3597   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3598   if (call == MAT_INITIAL_MATRIX) {
3599     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3600     ii  = aij->i;
3601     jj  = aij->j;
3602 
3603     /*
3604         Determine the number of non-zeros in the diagonal and off-diagonal
3605         portions of the matrix in order to do correct preallocation
3606     */
3607 
3608     /* first get start and end of "diagonal" columns */
3609     if (csize == PETSC_DECIDE) {
3610       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3611       if (mglobal == n) { /* square matrix */
3612 	nlocal = m;
3613       } else {
3614         nlocal = n/size + ((n % size) > rank);
3615       }
3616     } else {
3617       nlocal = csize;
3618     }
3619     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3620     rstart = rend - nlocal;
3621     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3622 
3623     /* next, compute all the lengths */
3624     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3625     olens = dlens + m;
3626     for (i=0; i<m; i++) {
3627       jend = ii[i+1] - ii[i];
3628       olen = 0;
3629       dlen = 0;
3630       for (j=0; j<jend; j++) {
3631         if (*jj < rstart || *jj >= rend) olen++;
3632         else dlen++;
3633         jj++;
3634       }
3635       olens[i] = olen;
3636       dlens[i] = dlen;
3637     }
3638     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3639     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3640     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3641     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3642     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3643     ierr = PetscFree(dlens);CHKERRQ(ierr);
3644   } else {
3645     PetscInt ml,nl;
3646 
3647     M = *newmat;
3648     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3649     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3650     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3651     /*
3652          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3653        rather than the slower MatSetValues().
3654     */
3655     M->was_assembled = PETSC_TRUE;
3656     M->assembled     = PETSC_FALSE;
3657   }
3658   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3659   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3660   ii  = aij->i;
3661   jj  = aij->j;
3662   aa  = aij->a;
3663   for (i=0; i<m; i++) {
3664     row   = rstart + i;
3665     nz    = ii[i+1] - ii[i];
3666     cwork = jj;     jj += nz;
3667     vwork = aa;     aa += nz;
3668     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3669   }
3670 
3671   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3672   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3673   *newmat = M;
3674 
3675   /* save submatrix used in processor for next request */
3676   if (call ==  MAT_INITIAL_MATRIX) {
3677     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3678     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3679   }
3680 
3681   PetscFunctionReturn(0);
3682 }
3683 
3684 EXTERN_C_BEGIN
3685 #undef __FUNCT__
3686 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3687 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3688 {
3689   PetscInt       m,cstart, cend,j,nnz,i,d;
3690   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3691   const PetscInt *JJ;
3692   PetscScalar    *values;
3693   PetscErrorCode ierr;
3694 
3695   PetscFunctionBegin;
3696   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3697 
3698   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3699   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3700   m      = B->rmap->n;
3701   cstart = B->cmap->rstart;
3702   cend   = B->cmap->rend;
3703   rstart = B->rmap->rstart;
3704 
3705   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3706 
3707 #if defined(PETSC_USE_DEBUGGING)
3708   for (i=0; i<m; i++) {
3709     nnz     = Ii[i+1]- Ii[i];
3710     JJ      = J + Ii[i];
3711     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3712     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3713     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3714   }
3715 #endif
3716 
3717   for (i=0; i<m; i++) {
3718     nnz     = Ii[i+1]- Ii[i];
3719     JJ      = J + Ii[i];
3720     nnz_max = PetscMax(nnz_max,nnz);
3721     d       = 0;
3722     for (j=0; j<nnz; j++) {
3723       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3724     }
3725     d_nnz[i] = d;
3726     o_nnz[i] = nnz - d;
3727   }
3728   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3729   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3730 
3731   if (v) values = (PetscScalar*)v;
3732   else {
3733     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3734     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3735   }
3736 
3737   for (i=0; i<m; i++) {
3738     ii   = i + rstart;
3739     nnz  = Ii[i+1]- Ii[i];
3740     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3741   }
3742   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3743   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3744 
3745   if (!v) {
3746     ierr = PetscFree(values);CHKERRQ(ierr);
3747   }
3748   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3749   PetscFunctionReturn(0);
3750 }
3751 EXTERN_C_END
3752 
3753 #undef __FUNCT__
3754 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3755 /*@
3756    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3757    (the default parallel PETSc format).
3758 
3759    Collective on MPI_Comm
3760 
3761    Input Parameters:
3762 +  B - the matrix
3763 .  i - the indices into j for the start of each local row (starts with zero)
3764 .  j - the column indices for each local row (starts with zero)
3765 -  v - optional values in the matrix
3766 
3767    Level: developer
3768 
3769    Notes:
3770        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3771      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3772      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3773 
3774        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3775 
3776        The format which is used for the sparse matrix input, is equivalent to a
3777     row-major ordering.. i.e for the following matrix, the input data expected is
3778     as shown:
3779 
3780         1 0 0
3781         2 0 3     P0
3782        -------
3783         4 5 6     P1
3784 
3785      Process0 [P0]: rows_owned=[0,1]
3786         i =  {0,1,3}  [size = nrow+1  = 2+1]
3787         j =  {0,0,2}  [size = nz = 6]
3788         v =  {1,2,3}  [size = nz = 6]
3789 
3790      Process1 [P1]: rows_owned=[2]
3791         i =  {0,3}    [size = nrow+1  = 1+1]
3792         j =  {0,1,2}  [size = nz = 6]
3793         v =  {4,5,6}  [size = nz = 6]
3794 
3795 .keywords: matrix, aij, compressed row, sparse, parallel
3796 
3797 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3798           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3799 @*/
3800 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3801 {
3802   PetscErrorCode ierr;
3803 
3804   PetscFunctionBegin;
3805   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3806   PetscFunctionReturn(0);
3807 }
3808 
3809 #undef __FUNCT__
3810 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3811 /*@C
3812    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3813    (the default parallel PETSc format).  For good matrix assembly performance
3814    the user should preallocate the matrix storage by setting the parameters
3815    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3816    performance can be increased by more than a factor of 50.
3817 
3818    Collective on MPI_Comm
3819 
3820    Input Parameters:
3821 +  A - the matrix
3822 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3823            (same value is used for all local rows)
3824 .  d_nnz - array containing the number of nonzeros in the various rows of the
3825            DIAGONAL portion of the local submatrix (possibly different for each row)
3826            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3827            The size of this array is equal to the number of local rows, i.e 'm'.
3828            For matrices that will be factored, you must leave room for (and set)
3829            the diagonal entry even if it is zero.
3830 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3831            submatrix (same value is used for all local rows).
3832 -  o_nnz - array containing the number of nonzeros in the various rows of the
3833            OFF-DIAGONAL portion of the local submatrix (possibly different for
3834            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3835            structure. The size of this array is equal to the number
3836            of local rows, i.e 'm'.
3837 
3838    If the *_nnz parameter is given then the *_nz parameter is ignored
3839 
3840    The AIJ format (also called the Yale sparse matrix format or
3841    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3842    storage.  The stored row and column indices begin with zero.
3843    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3844 
3845    The parallel matrix is partitioned such that the first m0 rows belong to
3846    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3847    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3848 
3849    The DIAGONAL portion of the local submatrix of a processor can be defined
3850    as the submatrix which is obtained by extraction the part corresponding to
3851    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3852    first row that belongs to the processor, r2 is the last row belonging to
3853    the this processor, and c1-c2 is range of indices of the local part of a
3854    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3855    common case of a square matrix, the row and column ranges are the same and
3856    the DIAGONAL part is also square. The remaining portion of the local
3857    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3858 
3859    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3860 
3861    You can call MatGetInfo() to get information on how effective the preallocation was;
3862    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3863    You can also run with the option -info and look for messages with the string
3864    malloc in them to see if additional memory allocation was needed.
3865 
3866    Example usage:
3867 
3868    Consider the following 8x8 matrix with 34 non-zero values, that is
3869    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3870    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3871    as follows:
3872 
3873 .vb
3874             1  2  0  |  0  3  0  |  0  4
3875     Proc0   0  5  6  |  7  0  0  |  8  0
3876             9  0 10  | 11  0  0  | 12  0
3877     -------------------------------------
3878            13  0 14  | 15 16 17  |  0  0
3879     Proc1   0 18  0  | 19 20 21  |  0  0
3880             0  0  0  | 22 23  0  | 24  0
3881     -------------------------------------
3882     Proc2  25 26 27  |  0  0 28  | 29  0
3883            30  0  0  | 31 32 33  |  0 34
3884 .ve
3885 
3886    This can be represented as a collection of submatrices as:
3887 
3888 .vb
3889       A B C
3890       D E F
3891       G H I
3892 .ve
3893 
3894    Where the submatrices A,B,C are owned by proc0, D,E,F are
3895    owned by proc1, G,H,I are owned by proc2.
3896 
3897    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3898    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3899    The 'M','N' parameters are 8,8, and have the same values on all procs.
3900 
3901    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3902    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3903    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3904    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3905    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3906    matrix, ans [DF] as another SeqAIJ matrix.
3907 
3908    When d_nz, o_nz parameters are specified, d_nz storage elements are
3909    allocated for every row of the local diagonal submatrix, and o_nz
3910    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3911    One way to choose d_nz and o_nz is to use the max nonzerors per local
3912    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3913    In this case, the values of d_nz,o_nz are:
3914 .vb
3915      proc0 : dnz = 2, o_nz = 2
3916      proc1 : dnz = 3, o_nz = 2
3917      proc2 : dnz = 1, o_nz = 4
3918 .ve
3919    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3920    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3921    for proc3. i.e we are using 12+15+10=37 storage locations to store
3922    34 values.
3923 
3924    When d_nnz, o_nnz parameters are specified, the storage is specified
3925    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3926    In the above case the values for d_nnz,o_nnz are:
3927 .vb
3928      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3929      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3930      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3931 .ve
3932    Here the space allocated is sum of all the above values i.e 34, and
3933    hence pre-allocation is perfect.
3934 
3935    Level: intermediate
3936 
3937 .keywords: matrix, aij, compressed row, sparse, parallel
3938 
3939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3940           MPIAIJ, MatGetInfo()
3941 @*/
3942 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3943 {
3944   PetscErrorCode ierr;
3945 
3946   PetscFunctionBegin;
3947   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3948   PetscValidType(B,1);
3949   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3950   PetscFunctionReturn(0);
3951 }
3952 
3953 #undef __FUNCT__
3954 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3955 /*@
3956      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3957          CSR format the local rows.
3958 
3959    Collective on MPI_Comm
3960 
3961    Input Parameters:
3962 +  comm - MPI communicator
3963 .  m - number of local rows (Cannot be PETSC_DECIDE)
3964 .  n - This value should be the same as the local size used in creating the
3965        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3966        calculated if N is given) For square matrices n is almost always m.
3967 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3968 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3969 .   i - row indices
3970 .   j - column indices
3971 -   a - matrix values
3972 
3973    Output Parameter:
3974 .   mat - the matrix
3975 
3976    Level: intermediate
3977 
3978    Notes:
3979        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3980      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3981      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3982 
3983        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3984 
3985        The format which is used for the sparse matrix input, is equivalent to a
3986     row-major ordering.. i.e for the following matrix, the input data expected is
3987     as shown:
3988 
3989         1 0 0
3990         2 0 3     P0
3991        -------
3992         4 5 6     P1
3993 
3994      Process0 [P0]: rows_owned=[0,1]
3995         i =  {0,1,3}  [size = nrow+1  = 2+1]
3996         j =  {0,0,2}  [size = nz = 6]
3997         v =  {1,2,3}  [size = nz = 6]
3998 
3999      Process1 [P1]: rows_owned=[2]
4000         i =  {0,3}    [size = nrow+1  = 1+1]
4001         j =  {0,1,2}  [size = nz = 6]
4002         v =  {4,5,6}  [size = nz = 6]
4003 
4004 .keywords: matrix, aij, compressed row, sparse, parallel
4005 
4006 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4007           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4008 @*/
4009 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4010 {
4011   PetscErrorCode ierr;
4012 
4013   PetscFunctionBegin;
4014   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4015   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4016   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4017   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4018   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4019   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4020   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4021   PetscFunctionReturn(0);
4022 }
4023 
4024 #undef __FUNCT__
4025 #define __FUNCT__ "MatCreateAIJ"
4026 /*@C
4027    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4028    (the default parallel PETSc format).  For good matrix assembly performance
4029    the user should preallocate the matrix storage by setting the parameters
4030    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4031    performance can be increased by more than a factor of 50.
4032 
4033    Collective on MPI_Comm
4034 
4035    Input Parameters:
4036 +  comm - MPI communicator
4037 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4038            This value should be the same as the local size used in creating the
4039            y vector for the matrix-vector product y = Ax.
4040 .  n - This value should be the same as the local size used in creating the
4041        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4042        calculated if N is given) For square matrices n is almost always m.
4043 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4044 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4045 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4046            (same value is used for all local rows)
4047 .  d_nnz - array containing the number of nonzeros in the various rows of the
4048            DIAGONAL portion of the local submatrix (possibly different for each row)
4049            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4050            The size of this array is equal to the number of local rows, i.e 'm'.
4051 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4052            submatrix (same value is used for all local rows).
4053 -  o_nnz - array containing the number of nonzeros in the various rows of the
4054            OFF-DIAGONAL portion of the local submatrix (possibly different for
4055            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4056            structure. The size of this array is equal to the number
4057            of local rows, i.e 'm'.
4058 
4059    Output Parameter:
4060 .  A - the matrix
4061 
4062    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4063    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4064    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4065 
4066    Notes:
4067    If the *_nnz parameter is given then the *_nz parameter is ignored
4068 
4069    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4070    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4071    storage requirements for this matrix.
4072 
4073    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4074    processor than it must be used on all processors that share the object for
4075    that argument.
4076 
4077    The user MUST specify either the local or global matrix dimensions
4078    (possibly both).
4079 
4080    The parallel matrix is partitioned across processors such that the
4081    first m0 rows belong to process 0, the next m1 rows belong to
4082    process 1, the next m2 rows belong to process 2 etc.. where
4083    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4084    values corresponding to [m x N] submatrix.
4085 
4086    The columns are logically partitioned with the n0 columns belonging
4087    to 0th partition, the next n1 columns belonging to the next
4088    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4089 
4090    The DIAGONAL portion of the local submatrix on any given processor
4091    is the submatrix corresponding to the rows and columns m,n
4092    corresponding to the given processor. i.e diagonal matrix on
4093    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4094    etc. The remaining portion of the local submatrix [m x (N-n)]
4095    constitute the OFF-DIAGONAL portion. The example below better
4096    illustrates this concept.
4097 
4098    For a square global matrix we define each processor's diagonal portion
4099    to be its local rows and the corresponding columns (a square submatrix);
4100    each processor's off-diagonal portion encompasses the remainder of the
4101    local matrix (a rectangular submatrix).
4102 
4103    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4104 
4105    When calling this routine with a single process communicator, a matrix of
4106    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4107    type of communicator, use the construction mechanism:
4108      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4109 
4110    By default, this format uses inodes (identical nodes) when possible.
4111    We search for consecutive rows with the same nonzero structure, thereby
4112    reusing matrix information to achieve increased efficiency.
4113 
4114    Options Database Keys:
4115 +  -mat_no_inode  - Do not use inodes
4116 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4117 -  -mat_aij_oneindex - Internally use indexing starting at 1
4118         rather than 0.  Note that when calling MatSetValues(),
4119         the user still MUST index entries starting at 0!
4120 
4121 
4122    Example usage:
4123 
4124    Consider the following 8x8 matrix with 34 non-zero values, that is
4125    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4126    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4127    as follows:
4128 
4129 .vb
4130             1  2  0  |  0  3  0  |  0  4
4131     Proc0   0  5  6  |  7  0  0  |  8  0
4132             9  0 10  | 11  0  0  | 12  0
4133     -------------------------------------
4134            13  0 14  | 15 16 17  |  0  0
4135     Proc1   0 18  0  | 19 20 21  |  0  0
4136             0  0  0  | 22 23  0  | 24  0
4137     -------------------------------------
4138     Proc2  25 26 27  |  0  0 28  | 29  0
4139            30  0  0  | 31 32 33  |  0 34
4140 .ve
4141 
4142    This can be represented as a collection of submatrices as:
4143 
4144 .vb
4145       A B C
4146       D E F
4147       G H I
4148 .ve
4149 
4150    Where the submatrices A,B,C are owned by proc0, D,E,F are
4151    owned by proc1, G,H,I are owned by proc2.
4152 
4153    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4154    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4155    The 'M','N' parameters are 8,8, and have the same values on all procs.
4156 
4157    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4158    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4159    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4160    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4161    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4162    matrix, ans [DF] as another SeqAIJ matrix.
4163 
4164    When d_nz, o_nz parameters are specified, d_nz storage elements are
4165    allocated for every row of the local diagonal submatrix, and o_nz
4166    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4167    One way to choose d_nz and o_nz is to use the max nonzerors per local
4168    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4169    In this case, the values of d_nz,o_nz are:
4170 .vb
4171      proc0 : dnz = 2, o_nz = 2
4172      proc1 : dnz = 3, o_nz = 2
4173      proc2 : dnz = 1, o_nz = 4
4174 .ve
4175    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4176    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4177    for proc3. i.e we are using 12+15+10=37 storage locations to store
4178    34 values.
4179 
4180    When d_nnz, o_nnz parameters are specified, the storage is specified
4181    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4182    In the above case the values for d_nnz,o_nnz are:
4183 .vb
4184      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4185      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4186      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4187 .ve
4188    Here the space allocated is sum of all the above values i.e 34, and
4189    hence pre-allocation is perfect.
4190 
4191    Level: intermediate
4192 
4193 .keywords: matrix, aij, compressed row, sparse, parallel
4194 
4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4196           MPIAIJ, MatCreateMPIAIJWithArrays()
4197 @*/
4198 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4199 {
4200   PetscErrorCode ierr;
4201   PetscMPIInt    size;
4202 
4203   PetscFunctionBegin;
4204   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4205   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4206   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4207   if (size > 1) {
4208     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4209     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4210   } else {
4211     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4212     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4213   }
4214   PetscFunctionReturn(0);
4215 }
4216 
4217 #undef __FUNCT__
4218 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4219 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4220 {
4221   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4222 
4223   PetscFunctionBegin;
4224   *Ad     = a->A;
4225   *Ao     = a->B;
4226   *colmap = a->garray;
4227   PetscFunctionReturn(0);
4228 }
4229 
4230 #undef __FUNCT__
4231 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4232 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4233 {
4234   PetscErrorCode ierr;
4235   PetscInt       i;
4236   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4237 
4238   PetscFunctionBegin;
4239   if (coloring->ctype == IS_COLORING_GLOBAL) {
4240     ISColoringValue *allcolors,*colors;
4241     ISColoring      ocoloring;
4242 
4243     /* set coloring for diagonal portion */
4244     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4245 
4246     /* set coloring for off-diagonal portion */
4247     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4248     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4249     for (i=0; i<a->B->cmap->n; i++) {
4250       colors[i] = allcolors[a->garray[i]];
4251     }
4252     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4253     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4254     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4255     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4256   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4257     ISColoringValue *colors;
4258     PetscInt        *larray;
4259     ISColoring      ocoloring;
4260 
4261     /* set coloring for diagonal portion */
4262     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4263     for (i=0; i<a->A->cmap->n; i++) {
4264       larray[i] = i + A->cmap->rstart;
4265     }
4266     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4267     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4268     for (i=0; i<a->A->cmap->n; i++) {
4269       colors[i] = coloring->colors[larray[i]];
4270     }
4271     ierr = PetscFree(larray);CHKERRQ(ierr);
4272     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4273     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4274     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4275 
4276     /* set coloring for off-diagonal portion */
4277     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4278     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4279     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4280     for (i=0; i<a->B->cmap->n; i++) {
4281       colors[i] = coloring->colors[larray[i]];
4282     }
4283     ierr = PetscFree(larray);CHKERRQ(ierr);
4284     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4285     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4286     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4287   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4288 
4289   PetscFunctionReturn(0);
4290 }
4291 
4292 #if defined(PETSC_HAVE_ADIC)
4293 #undef __FUNCT__
4294 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4295 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4296 {
4297   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4298   PetscErrorCode ierr;
4299 
4300   PetscFunctionBegin;
4301   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4302   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4303   PetscFunctionReturn(0);
4304 }
4305 #endif
4306 
4307 #undef __FUNCT__
4308 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4309 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4310 {
4311   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4312   PetscErrorCode ierr;
4313 
4314   PetscFunctionBegin;
4315   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4316   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4317   PetscFunctionReturn(0);
4318 }
4319 
4320 #undef __FUNCT__
4321 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4322 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4323 {
4324   PetscErrorCode ierr;
4325   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4326   PetscInt       *indx;
4327 
4328   PetscFunctionBegin;
4329   /* This routine will ONLY return MPIAIJ type matrix */
4330   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4331   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4332   if (n == PETSC_DECIDE){
4333     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4334   }
4335   /* Check sum(n) = N */
4336   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4337   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4338 
4339   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4340   rstart -= m;
4341 
4342   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4343   for (i=0;i<m;i++) {
4344     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4345     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4346     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4347   }
4348 
4349   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4350   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4351   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4352   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4353   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4354   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4355   PetscFunctionReturn(0);
4356 }
4357 
4358 #undef __FUNCT__
4359 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4360 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4361 {
4362   PetscErrorCode ierr;
4363   PetscInt       m,N,i,rstart,nnz,Ii;
4364   PetscInt       *indx;
4365   PetscScalar    *values;
4366 
4367   PetscFunctionBegin;
4368   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4369   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4370   for (i=0;i<m;i++) {
4371     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4372     Ii    = i + rstart;
4373     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4374     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4375   }
4376   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4377   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4378   PetscFunctionReturn(0);
4379 }
4380 
4381 #undef __FUNCT__
4382 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4383 /*@
4384       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4385                  matrices from each processor
4386 
4387     Collective on MPI_Comm
4388 
4389    Input Parameters:
4390 +    comm - the communicators the parallel matrix will live on
4391 .    inmat - the input sequential matrices
4392 .    n - number of local columns (or PETSC_DECIDE)
4393 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4394 
4395    Output Parameter:
4396 .    outmat - the parallel matrix generated
4397 
4398     Level: advanced
4399 
4400    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4401 
4402 @*/
4403 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4404 {
4405   PetscErrorCode ierr;
4406 
4407   PetscFunctionBegin;
4408   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4409   if (scall == MAT_INITIAL_MATRIX){
4410     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4411   }
4412   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4413   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4414   PetscFunctionReturn(0);
4415 }
4416 
4417 #undef __FUNCT__
4418 #define __FUNCT__ "MatFileSplit"
4419 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4420 {
4421   PetscErrorCode    ierr;
4422   PetscMPIInt       rank;
4423   PetscInt          m,N,i,rstart,nnz;
4424   size_t            len;
4425   const PetscInt    *indx;
4426   PetscViewer       out;
4427   char              *name;
4428   Mat               B;
4429   const PetscScalar *values;
4430 
4431   PetscFunctionBegin;
4432   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4433   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4434   /* Should this be the type of the diagonal block of A? */
4435   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4436   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4437   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4438   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4439   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4440   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4441   for (i=0;i<m;i++) {
4442     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4443     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4444     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4445   }
4446   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4447   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4448 
4449   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4450   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4451   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4452   sprintf(name,"%s.%d",outfile,rank);
4453   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4454   ierr = PetscFree(name);
4455   ierr = MatView(B,out);CHKERRQ(ierr);
4456   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4457   ierr = MatDestroy(&B);CHKERRQ(ierr);
4458   PetscFunctionReturn(0);
4459 }
4460 
4461 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4462 #undef __FUNCT__
4463 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4464 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4465 {
4466   PetscErrorCode       ierr;
4467   Mat_Merge_SeqsToMPI  *merge;
4468   PetscContainer       container;
4469 
4470   PetscFunctionBegin;
4471   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4472   if (container) {
4473     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4474     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4475     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4476     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4477     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4478     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4486     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4487     ierr = PetscFree(merge);CHKERRQ(ierr);
4488     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4489   }
4490   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4491   PetscFunctionReturn(0);
4492 }
4493 
4494 #include <../src/mat/utils/freespace.h>
4495 #include <petscbt.h>
4496 
4497 #undef __FUNCT__
4498 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4499 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4500 {
4501   PetscErrorCode       ierr;
4502   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4503   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4504   PetscMPIInt          size,rank,taga,*len_s;
4505   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4506   PetscInt             proc,m;
4507   PetscInt             **buf_ri,**buf_rj;
4508   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4509   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4510   MPI_Request          *s_waits,*r_waits;
4511   MPI_Status           *status;
4512   MatScalar            *aa=a->a;
4513   MatScalar            **abuf_r,*ba_i;
4514   Mat_Merge_SeqsToMPI  *merge;
4515   PetscContainer       container;
4516 
4517   PetscFunctionBegin;
4518   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4519 
4520   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4521   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4522 
4523   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4524   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4525 
4526   bi     = merge->bi;
4527   bj     = merge->bj;
4528   buf_ri = merge->buf_ri;
4529   buf_rj = merge->buf_rj;
4530 
4531   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4532   owners = merge->rowmap->range;
4533   len_s  = merge->len_s;
4534 
4535   /* send and recv matrix values */
4536   /*-----------------------------*/
4537   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4538   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4539 
4540   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4541   for (proc=0,k=0; proc<size; proc++){
4542     if (!len_s[proc]) continue;
4543     i = owners[proc];
4544     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4545     k++;
4546   }
4547 
4548   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4549   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4550   ierr = PetscFree(status);CHKERRQ(ierr);
4551 
4552   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4553   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4554 
4555   /* insert mat values of mpimat */
4556   /*----------------------------*/
4557   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4558   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4559 
4560   for (k=0; k<merge->nrecv; k++){
4561     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4562     nrows = *(buf_ri_k[k]);
4563     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4564     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4565   }
4566 
4567   /* set values of ba */
4568   m = merge->rowmap->n;
4569   for (i=0; i<m; i++) {
4570     arow = owners[rank] + i;
4571     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4572     bnzi = bi[i+1] - bi[i];
4573     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4574 
4575     /* add local non-zero vals of this proc's seqmat into ba */
4576     anzi = ai[arow+1] - ai[arow];
4577     aj   = a->j + ai[arow];
4578     aa   = a->a + ai[arow];
4579     nextaj = 0;
4580     for (j=0; nextaj<anzi; j++){
4581       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4582         ba_i[j] += aa[nextaj++];
4583       }
4584     }
4585 
4586     /* add received vals into ba */
4587     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4588       /* i-th row */
4589       if (i == *nextrow[k]) {
4590         anzi = *(nextai[k]+1) - *nextai[k];
4591         aj   = buf_rj[k] + *(nextai[k]);
4592         aa   = abuf_r[k] + *(nextai[k]);
4593         nextaj = 0;
4594         for (j=0; nextaj<anzi; j++){
4595           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4596             ba_i[j] += aa[nextaj++];
4597           }
4598         }
4599         nextrow[k]++; nextai[k]++;
4600       }
4601     }
4602     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4603   }
4604   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4605   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4606 
4607   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4608   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4609   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4610   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4611   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4612   PetscFunctionReturn(0);
4613 }
4614 
4615 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4616 
4617 #undef __FUNCT__
4618 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4619 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4620 {
4621   PetscErrorCode       ierr;
4622   Mat                  B_mpi;
4623   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4624   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4625   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4626   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4627   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4628   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4629   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4630   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4631   MPI_Status           *status;
4632   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4633   PetscBT              lnkbt;
4634   Mat_Merge_SeqsToMPI  *merge;
4635   PetscContainer       container;
4636 
4637   PetscFunctionBegin;
4638   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4639 
4640   /* make sure it is a PETSc comm */
4641   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4642   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4643   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4644 
4645   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4646   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4647 
4648   /* determine row ownership */
4649   /*---------------------------------------------------------*/
4650   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4651   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4652   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4653   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4654   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4655   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4656   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4657 
4658   m      = merge->rowmap->n;
4659   M      = merge->rowmap->N;
4660   owners = merge->rowmap->range;
4661 
4662   /* determine the number of messages to send, their lengths */
4663   /*---------------------------------------------------------*/
4664   len_s  = merge->len_s;
4665 
4666   len = 0;  /* length of buf_si[] */
4667   merge->nsend = 0;
4668   for (proc=0; proc<size; proc++){
4669     len_si[proc] = 0;
4670     if (proc == rank){
4671       len_s[proc] = 0;
4672     } else {
4673       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4674       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4675     }
4676     if (len_s[proc]) {
4677       merge->nsend++;
4678       nrows = 0;
4679       for (i=owners[proc]; i<owners[proc+1]; i++){
4680         if (ai[i+1] > ai[i]) nrows++;
4681       }
4682       len_si[proc] = 2*(nrows+1);
4683       len += len_si[proc];
4684     }
4685   }
4686 
4687   /* determine the number and length of messages to receive for ij-structure */
4688   /*-------------------------------------------------------------------------*/
4689   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4690   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4691 
4692   /* post the Irecv of j-structure */
4693   /*-------------------------------*/
4694   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4695   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4696 
4697   /* post the Isend of j-structure */
4698   /*--------------------------------*/
4699   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4700 
4701   for (proc=0, k=0; proc<size; proc++){
4702     if (!len_s[proc]) continue;
4703     i = owners[proc];
4704     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4705     k++;
4706   }
4707 
4708   /* receives and sends of j-structure are complete */
4709   /*------------------------------------------------*/
4710   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4711   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4712 
4713   /* send and recv i-structure */
4714   /*---------------------------*/
4715   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4716   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4717 
4718   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4719   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4720   for (proc=0,k=0; proc<size; proc++){
4721     if (!len_s[proc]) continue;
4722     /* form outgoing message for i-structure:
4723          buf_si[0]:                 nrows to be sent
4724                [1:nrows]:           row index (global)
4725                [nrows+1:2*nrows+1]: i-structure index
4726     */
4727     /*-------------------------------------------*/
4728     nrows = len_si[proc]/2 - 1;
4729     buf_si_i    = buf_si + nrows+1;
4730     buf_si[0]   = nrows;
4731     buf_si_i[0] = 0;
4732     nrows = 0;
4733     for (i=owners[proc]; i<owners[proc+1]; i++){
4734       anzi = ai[i+1] - ai[i];
4735       if (anzi) {
4736         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4737         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4738         nrows++;
4739       }
4740     }
4741     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4742     k++;
4743     buf_si += len_si[proc];
4744   }
4745 
4746   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4747   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4748 
4749   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4750   for (i=0; i<merge->nrecv; i++){
4751     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4752   }
4753 
4754   ierr = PetscFree(len_si);CHKERRQ(ierr);
4755   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4756   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4757   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4758   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4759   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4760   ierr = PetscFree(status);CHKERRQ(ierr);
4761 
4762   /* compute a local seq matrix in each processor */
4763   /*----------------------------------------------*/
4764   /* allocate bi array and free space for accumulating nonzero column info */
4765   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4766   bi[0] = 0;
4767 
4768   /* create and initialize a linked list */
4769   nlnk = N+1;
4770   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4771 
4772   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4773   len = 0;
4774   len  = ai[owners[rank+1]] - ai[owners[rank]];
4775   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4776   current_space = free_space;
4777 
4778   /* determine symbolic info for each local row */
4779   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4780 
4781   for (k=0; k<merge->nrecv; k++){
4782     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4783     nrows = *buf_ri_k[k];
4784     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4785     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4786   }
4787 
4788   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4789   len = 0;
4790   for (i=0;i<m;i++) {
4791     bnzi   = 0;
4792     /* add local non-zero cols of this proc's seqmat into lnk */
4793     arow   = owners[rank] + i;
4794     anzi   = ai[arow+1] - ai[arow];
4795     aj     = a->j + ai[arow];
4796     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4797     bnzi += nlnk;
4798     /* add received col data into lnk */
4799     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4800       if (i == *nextrow[k]) { /* i-th row */
4801         anzi = *(nextai[k]+1) - *nextai[k];
4802         aj   = buf_rj[k] + *nextai[k];
4803         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4804         bnzi += nlnk;
4805         nextrow[k]++; nextai[k]++;
4806       }
4807     }
4808     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4809 
4810     /* if free space is not available, make more free space */
4811     if (current_space->local_remaining<bnzi) {
4812       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4813       nspacedouble++;
4814     }
4815     /* copy data into free space, then initialize lnk */
4816     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4817     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4818 
4819     current_space->array           += bnzi;
4820     current_space->local_used      += bnzi;
4821     current_space->local_remaining -= bnzi;
4822 
4823     bi[i+1] = bi[i] + bnzi;
4824   }
4825 
4826   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4827 
4828   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4829   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4830   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4831 
4832   /* create symbolic parallel matrix B_mpi */
4833   /*---------------------------------------*/
4834     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4835   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4836   if (n==PETSC_DECIDE) {
4837     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4838   } else {
4839     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4840   }
4841   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4842   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4843   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4844   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4845   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4846 
4847   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4848   B_mpi->assembled     = PETSC_FALSE;
4849   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4850   merge->bi            = bi;
4851   merge->bj            = bj;
4852   merge->buf_ri        = buf_ri;
4853   merge->buf_rj        = buf_rj;
4854   merge->coi           = PETSC_NULL;
4855   merge->coj           = PETSC_NULL;
4856   merge->owners_co     = PETSC_NULL;
4857 
4858   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4859 
4860   /* attach the supporting struct to B_mpi for reuse */
4861   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4862   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4863   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4864   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4865   *mpimat = B_mpi;
4866 
4867   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4868   PetscFunctionReturn(0);
4869 }
4870 
4871 #undef __FUNCT__
4872 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4873 /*@C
4874       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4875                  matrices from each processor
4876 
4877     Collective on MPI_Comm
4878 
4879    Input Parameters:
4880 +    comm - the communicators the parallel matrix will live on
4881 .    seqmat - the input sequential matrices
4882 .    m - number of local rows (or PETSC_DECIDE)
4883 .    n - number of local columns (or PETSC_DECIDE)
4884 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4885 
4886    Output Parameter:
4887 .    mpimat - the parallel matrix generated
4888 
4889     Level: advanced
4890 
4891    Notes:
4892      The dimensions of the sequential matrix in each processor MUST be the same.
4893      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4894      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4895 @*/
4896 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4897 {
4898   PetscErrorCode   ierr;
4899   PetscMPIInt     size;
4900 
4901   PetscFunctionBegin;
4902   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4903   if (size == 1){
4904      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4905      if (scall == MAT_INITIAL_MATRIX){
4906        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4907      } else {
4908        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4909      }
4910      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4911      PetscFunctionReturn(0);
4912   }
4913   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4914   if (scall == MAT_INITIAL_MATRIX){
4915     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4916   }
4917   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4918   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4919   PetscFunctionReturn(0);
4920 }
4921 
4922 #undef __FUNCT__
4923 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4924 /*@
4925      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4926           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4927           with MatGetSize()
4928 
4929     Not Collective
4930 
4931    Input Parameters:
4932 +    A - the matrix
4933 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4934 
4935    Output Parameter:
4936 .    A_loc - the local sequential matrix generated
4937 
4938     Level: developer
4939 
4940 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4941 
4942 @*/
4943 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4944 {
4945   PetscErrorCode  ierr;
4946   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4947   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4948   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4949   MatScalar       *aa=a->a,*ba=b->a,*cam;
4950   PetscScalar     *ca;
4951   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4952   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4953   PetscBool       match;
4954 
4955   PetscFunctionBegin;
4956   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4957   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4958   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4959   if (scall == MAT_INITIAL_MATRIX){
4960     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4961     ci[0] = 0;
4962     for (i=0; i<am; i++){
4963       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4964     }
4965     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4966     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4967     k = 0;
4968     for (i=0; i<am; i++) {
4969       ncols_o = bi[i+1] - bi[i];
4970       ncols_d = ai[i+1] - ai[i];
4971       /* off-diagonal portion of A */
4972       for (jo=0; jo<ncols_o; jo++) {
4973         col = cmap[*bj];
4974         if (col >= cstart) break;
4975         cj[k]   = col; bj++;
4976         ca[k++] = *ba++;
4977       }
4978       /* diagonal portion of A */
4979       for (j=0; j<ncols_d; j++) {
4980         cj[k]   = cstart + *aj++;
4981         ca[k++] = *aa++;
4982       }
4983       /* off-diagonal portion of A */
4984       for (j=jo; j<ncols_o; j++) {
4985         cj[k]   = cmap[*bj++];
4986         ca[k++] = *ba++;
4987       }
4988     }
4989     /* put together the new matrix */
4990     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4991     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4992     /* Since these are PETSc arrays, change flags to free them as necessary. */
4993     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4994     mat->free_a  = PETSC_TRUE;
4995     mat->free_ij = PETSC_TRUE;
4996     mat->nonew   = 0;
4997   } else if (scall == MAT_REUSE_MATRIX){
4998     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4999     ci = mat->i; cj = mat->j; cam = mat->a;
5000     for (i=0; i<am; i++) {
5001       /* off-diagonal portion of A */
5002       ncols_o = bi[i+1] - bi[i];
5003       for (jo=0; jo<ncols_o; jo++) {
5004         col = cmap[*bj];
5005         if (col >= cstart) break;
5006         *cam++ = *ba++; bj++;
5007       }
5008       /* diagonal portion of A */
5009       ncols_d = ai[i+1] - ai[i];
5010       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5011       /* off-diagonal portion of A */
5012       for (j=jo; j<ncols_o; j++) {
5013         *cam++ = *ba++; bj++;
5014       }
5015     }
5016   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5017   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5018   PetscFunctionReturn(0);
5019 }
5020 
5021 #undef __FUNCT__
5022 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5023 /*@C
5024      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5025 
5026     Not Collective
5027 
5028    Input Parameters:
5029 +    A - the matrix
5030 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5031 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5032 
5033    Output Parameter:
5034 .    A_loc - the local sequential matrix generated
5035 
5036     Level: developer
5037 
5038 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5039 
5040 @*/
5041 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5042 {
5043   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5044   PetscErrorCode    ierr;
5045   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5046   IS                isrowa,iscola;
5047   Mat               *aloc;
5048   PetscBool       match;
5049 
5050   PetscFunctionBegin;
5051   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5052   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5053   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5054   if (!row){
5055     start = A->rmap->rstart; end = A->rmap->rend;
5056     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5057   } else {
5058     isrowa = *row;
5059   }
5060   if (!col){
5061     start = A->cmap->rstart;
5062     cmap  = a->garray;
5063     nzA   = a->A->cmap->n;
5064     nzB   = a->B->cmap->n;
5065     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5066     ncols = 0;
5067     for (i=0; i<nzB; i++) {
5068       if (cmap[i] < start) idx[ncols++] = cmap[i];
5069       else break;
5070     }
5071     imark = i;
5072     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5073     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5074     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5075   } else {
5076     iscola = *col;
5077   }
5078   if (scall != MAT_INITIAL_MATRIX){
5079     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5080     aloc[0] = *A_loc;
5081   }
5082   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5083   *A_loc = aloc[0];
5084   ierr = PetscFree(aloc);CHKERRQ(ierr);
5085   if (!row){
5086     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5087   }
5088   if (!col){
5089     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5090   }
5091   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5092   PetscFunctionReturn(0);
5093 }
5094 
5095 #undef __FUNCT__
5096 #define __FUNCT__ "MatGetBrowsOfAcols"
5097 /*@C
5098     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5099 
5100     Collective on Mat
5101 
5102    Input Parameters:
5103 +    A,B - the matrices in mpiaij format
5104 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5105 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5106 
5107    Output Parameter:
5108 +    rowb, colb - index sets of rows and columns of B to extract
5109 -    B_seq - the sequential matrix generated
5110 
5111     Level: developer
5112 
5113 @*/
5114 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5115 {
5116   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5117   PetscErrorCode    ierr;
5118   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5119   IS                isrowb,iscolb;
5120   Mat               *bseq=PETSC_NULL;
5121 
5122   PetscFunctionBegin;
5123   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5124     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5125   }
5126   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5127 
5128   if (scall == MAT_INITIAL_MATRIX){
5129     start = A->cmap->rstart;
5130     cmap  = a->garray;
5131     nzA   = a->A->cmap->n;
5132     nzB   = a->B->cmap->n;
5133     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5134     ncols = 0;
5135     for (i=0; i<nzB; i++) {  /* row < local row index */
5136       if (cmap[i] < start) idx[ncols++] = cmap[i];
5137       else break;
5138     }
5139     imark = i;
5140     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5141     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5142     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5143     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5144   } else {
5145     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5146     isrowb = *rowb; iscolb = *colb;
5147     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5148     bseq[0] = *B_seq;
5149   }
5150   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5151   *B_seq = bseq[0];
5152   ierr = PetscFree(bseq);CHKERRQ(ierr);
5153   if (!rowb){
5154     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5155   } else {
5156     *rowb = isrowb;
5157   }
5158   if (!colb){
5159     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5160   } else {
5161     *colb = iscolb;
5162   }
5163   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5164   PetscFunctionReturn(0);
5165 }
5166 
5167 #undef __FUNCT__
5168 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5169 /*
5170     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5171     of the OFF-DIAGONAL portion of local A
5172 
5173     Collective on Mat
5174 
5175    Input Parameters:
5176 +    A,B - the matrices in mpiaij format
5177 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5178 
5179    Output Parameter:
5180 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5181 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5182 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5183 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5184 
5185     Level: developer
5186 
5187 */
5188 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5189 {
5190   VecScatter_MPI_General *gen_to,*gen_from;
5191   PetscErrorCode         ierr;
5192   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5193   Mat_SeqAIJ             *b_oth;
5194   VecScatter             ctx=a->Mvctx;
5195   MPI_Comm               comm=((PetscObject)ctx)->comm;
5196   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5197   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5198   PetscScalar            *rvalues,*svalues;
5199   MatScalar              *b_otha,*bufa,*bufA;
5200   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5201   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5202   MPI_Status             *sstatus,rstatus;
5203   PetscMPIInt            jj;
5204   PetscInt               *cols,sbs,rbs;
5205   PetscScalar            *vals;
5206 
5207   PetscFunctionBegin;
5208   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5209     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5210   }
5211   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5212   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5213 
5214   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5215   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5216   rvalues  = gen_from->values; /* holds the length of receiving row */
5217   svalues  = gen_to->values;   /* holds the length of sending row */
5218   nrecvs   = gen_from->n;
5219   nsends   = gen_to->n;
5220 
5221   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5222   srow     = gen_to->indices;   /* local row index to be sent */
5223   sstarts  = gen_to->starts;
5224   sprocs   = gen_to->procs;
5225   sstatus  = gen_to->sstatus;
5226   sbs      = gen_to->bs;
5227   rstarts  = gen_from->starts;
5228   rprocs   = gen_from->procs;
5229   rbs      = gen_from->bs;
5230 
5231   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5232   if (scall == MAT_INITIAL_MATRIX){
5233     /* i-array */
5234     /*---------*/
5235     /*  post receives */
5236     for (i=0; i<nrecvs; i++){
5237       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5238       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5239       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5240     }
5241 
5242     /* pack the outgoing message */
5243     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5244     sstartsj[0] = 0;  rstartsj[0] = 0;
5245     len = 0; /* total length of j or a array to be sent */
5246     k = 0;
5247     for (i=0; i<nsends; i++){
5248       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5249       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5250       for (j=0; j<nrows; j++) {
5251         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5252         for (l=0; l<sbs; l++){
5253           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5254           rowlen[j*sbs+l] = ncols;
5255           len += ncols;
5256           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5257         }
5258         k++;
5259       }
5260       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5261       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5262     }
5263     /* recvs and sends of i-array are completed */
5264     i = nrecvs;
5265     while (i--) {
5266       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5267     }
5268     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5269 
5270     /* allocate buffers for sending j and a arrays */
5271     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5272     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5273 
5274     /* create i-array of B_oth */
5275     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5276     b_othi[0] = 0;
5277     len = 0; /* total length of j or a array to be received */
5278     k = 0;
5279     for (i=0; i<nrecvs; i++){
5280       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5281       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5282       for (j=0; j<nrows; j++) {
5283         b_othi[k+1] = b_othi[k] + rowlen[j];
5284         len += rowlen[j]; k++;
5285       }
5286       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5287     }
5288 
5289     /* allocate space for j and a arrrays of B_oth */
5290     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5291     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5292 
5293     /* j-array */
5294     /*---------*/
5295     /*  post receives of j-array */
5296     for (i=0; i<nrecvs; i++){
5297       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5298       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5299     }
5300 
5301     /* pack the outgoing message j-array */
5302     k = 0;
5303     for (i=0; i<nsends; i++){
5304       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5305       bufJ = bufj+sstartsj[i];
5306       for (j=0; j<nrows; j++) {
5307         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5308         for (ll=0; ll<sbs; ll++){
5309           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5310           for (l=0; l<ncols; l++){
5311             *bufJ++ = cols[l];
5312           }
5313           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5314         }
5315       }
5316       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5317     }
5318 
5319     /* recvs and sends of j-array are completed */
5320     i = nrecvs;
5321     while (i--) {
5322       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5323     }
5324     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5325   } else if (scall == MAT_REUSE_MATRIX){
5326     sstartsj = *startsj_s;
5327     rstartsj = *startsj_r;
5328     bufa     = *bufa_ptr;
5329     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5330     b_otha   = b_oth->a;
5331   } else {
5332     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5333   }
5334 
5335   /* a-array */
5336   /*---------*/
5337   /*  post receives of a-array */
5338   for (i=0; i<nrecvs; i++){
5339     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5340     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5341   }
5342 
5343   /* pack the outgoing message a-array */
5344   k = 0;
5345   for (i=0; i<nsends; i++){
5346     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5347     bufA = bufa+sstartsj[i];
5348     for (j=0; j<nrows; j++) {
5349       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5350       for (ll=0; ll<sbs; ll++){
5351         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5352         for (l=0; l<ncols; l++){
5353           *bufA++ = vals[l];
5354         }
5355         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5356       }
5357     }
5358     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5359   }
5360   /* recvs and sends of a-array are completed */
5361   i = nrecvs;
5362   while (i--) {
5363     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5364   }
5365   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5366   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5367 
5368   if (scall == MAT_INITIAL_MATRIX){
5369     /* put together the new matrix */
5370     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5371 
5372     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5373     /* Since these are PETSc arrays, change flags to free them as necessary. */
5374     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5375     b_oth->free_a  = PETSC_TRUE;
5376     b_oth->free_ij = PETSC_TRUE;
5377     b_oth->nonew   = 0;
5378 
5379     ierr = PetscFree(bufj);CHKERRQ(ierr);
5380     if (!startsj_s || !bufa_ptr){
5381       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5382       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5383     } else {
5384       *startsj_s = sstartsj;
5385       *startsj_r = rstartsj;
5386       *bufa_ptr  = bufa;
5387     }
5388   }
5389   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5390   PetscFunctionReturn(0);
5391 }
5392 
5393 #undef __FUNCT__
5394 #define __FUNCT__ "MatGetCommunicationStructs"
5395 /*@C
5396   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5397 
5398   Not Collective
5399 
5400   Input Parameters:
5401 . A - The matrix in mpiaij format
5402 
5403   Output Parameter:
5404 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5405 . colmap - A map from global column index to local index into lvec
5406 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5407 
5408   Level: developer
5409 
5410 @*/
5411 #if defined (PETSC_USE_CTABLE)
5412 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5413 #else
5414 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5415 #endif
5416 {
5417   Mat_MPIAIJ *a;
5418 
5419   PetscFunctionBegin;
5420   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5421   PetscValidPointer(lvec, 2);
5422   PetscValidPointer(colmap, 3);
5423   PetscValidPointer(multScatter, 4);
5424   a = (Mat_MPIAIJ *) A->data;
5425   if (lvec) *lvec = a->lvec;
5426   if (colmap) *colmap = a->colmap;
5427   if (multScatter) *multScatter = a->Mvctx;
5428   PetscFunctionReturn(0);
5429 }
5430 
5431 EXTERN_C_BEGIN
5432 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5433 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5434 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5435 EXTERN_C_END
5436 
5437 #undef __FUNCT__
5438 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5439 /*
5440     Computes (B'*A')' since computing B*A directly is untenable
5441 
5442                n                       p                          p
5443         (              )       (              )         (                  )
5444       m (      A       )  *  n (       B      )   =   m (         C        )
5445         (              )       (              )         (                  )
5446 
5447 */
5448 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5449 {
5450   PetscErrorCode     ierr;
5451   Mat                At,Bt,Ct;
5452 
5453   PetscFunctionBegin;
5454   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5455   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5456   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5457   ierr = MatDestroy(&At);CHKERRQ(ierr);
5458   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5459   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5460   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5461   PetscFunctionReturn(0);
5462 }
5463 
5464 #undef __FUNCT__
5465 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5466 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5467 {
5468   PetscErrorCode ierr;
5469   PetscInt       m=A->rmap->n,n=B->cmap->n;
5470   Mat            Cmat;
5471 
5472   PetscFunctionBegin;
5473   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5474   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5475   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5476   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5477   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5478   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5479   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5480   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5481   *C   = Cmat;
5482   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5483   PetscFunctionReturn(0);
5484 }
5485 
5486 /* ----------------------------------------------------------------*/
5487 #undef __FUNCT__
5488 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5489 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5490 {
5491   PetscErrorCode ierr;
5492 
5493   PetscFunctionBegin;
5494   if (scall == MAT_INITIAL_MATRIX){
5495     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5496   }
5497   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5498   PetscFunctionReturn(0);
5499 }
5500 
5501 EXTERN_C_BEGIN
5502 #if defined(PETSC_HAVE_MUMPS)
5503 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5504 #endif
5505 #if defined(PETSC_HAVE_PASTIX)
5506 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5507 #endif
5508 #if defined(PETSC_HAVE_SUPERLU_DIST)
5509 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5510 #endif
5511 #if defined(PETSC_HAVE_SPOOLES)
5512 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5513 #endif
5514 EXTERN_C_END
5515 
5516 /*MC
5517    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5518 
5519    Options Database Keys:
5520 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5521 
5522   Level: beginner
5523 
5524 .seealso: MatCreateAIJ()
5525 M*/
5526 
5527 EXTERN_C_BEGIN
5528 #undef __FUNCT__
5529 #define __FUNCT__ "MatCreate_MPIAIJ"
5530 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5531 {
5532   Mat_MPIAIJ     *b;
5533   PetscErrorCode ierr;
5534   PetscMPIInt    size;
5535 
5536   PetscFunctionBegin;
5537   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5538   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5539   B->data         = (void*)b;
5540   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5541   B->assembled    = PETSC_FALSE;
5542   B->insertmode   = NOT_SET_VALUES;
5543   b->size         = size;
5544   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5545 
5546   /* build cache for off array entries formed */
5547   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5548   b->donotstash  = PETSC_FALSE;
5549   b->colmap      = 0;
5550   b->garray      = 0;
5551   b->roworiented = PETSC_TRUE;
5552 
5553   /* stuff used for matrix vector multiply */
5554   b->lvec      = PETSC_NULL;
5555   b->Mvctx     = PETSC_NULL;
5556 
5557   /* stuff for MatGetRow() */
5558   b->rowindices   = 0;
5559   b->rowvalues    = 0;
5560   b->getrowactive = PETSC_FALSE;
5561 
5562   /* flexible pointer used in CUSP/CUSPARSE classes */
5563   b->spptr        = PETSC_NULL;
5564 #if defined(PETSC_HAVE_SPOOLES)
5565   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5566                                      "MatGetFactor_mpiaij_spooles",
5567                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5568 #endif
5569 #if defined(PETSC_HAVE_MUMPS)
5570   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5571                                      "MatGetFactor_aij_mumps",
5572                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5573 #endif
5574 #if defined(PETSC_HAVE_PASTIX)
5575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5576 					   "MatGetFactor_mpiaij_pastix",
5577 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5578 #endif
5579 #if defined(PETSC_HAVE_SUPERLU_DIST)
5580   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5581                                      "MatGetFactor_mpiaij_superlu_dist",
5582                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5583 #endif
5584   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5585                                      "MatStoreValues_MPIAIJ",
5586                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5587   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5588                                      "MatRetrieveValues_MPIAIJ",
5589                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5590   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5591 				     "MatGetDiagonalBlock_MPIAIJ",
5592                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5593   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5594 				     "MatIsTranspose_MPIAIJ",
5595 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5597 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5598 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5600 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5601 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5603 				     "MatDiagonalScaleLocal_MPIAIJ",
5604 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5606                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5607                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5609                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5610                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5612                                      "MatConvert_MPIAIJ_MPISBAIJ",
5613                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5615                                      "MatMatMult_MPIDense_MPIAIJ",
5616                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5618                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5619                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5621                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5622                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5623   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5624   PetscFunctionReturn(0);
5625 }
5626 EXTERN_C_END
5627 
5628 #undef __FUNCT__
5629 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5630 /*@
5631      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5632          and "off-diagonal" part of the matrix in CSR format.
5633 
5634    Collective on MPI_Comm
5635 
5636    Input Parameters:
5637 +  comm - MPI communicator
5638 .  m - number of local rows (Cannot be PETSC_DECIDE)
5639 .  n - This value should be the same as the local size used in creating the
5640        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5641        calculated if N is given) For square matrices n is almost always m.
5642 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5643 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5644 .   i - row indices for "diagonal" portion of matrix
5645 .   j - column indices
5646 .   a - matrix values
5647 .   oi - row indices for "off-diagonal" portion of matrix
5648 .   oj - column indices
5649 -   oa - matrix values
5650 
5651    Output Parameter:
5652 .   mat - the matrix
5653 
5654    Level: advanced
5655 
5656    Notes:
5657        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5658        must free the arrays once the matrix has been destroyed and not before.
5659 
5660        The i and j indices are 0 based
5661 
5662        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5663 
5664        This sets local rows and cannot be used to set off-processor values.
5665 
5666        You cannot later use MatSetValues() to change values in this matrix.
5667 
5668 .keywords: matrix, aij, compressed row, sparse, parallel
5669 
5670 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5671           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5672 @*/
5673 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5674 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5675 {
5676   PetscErrorCode ierr;
5677   Mat_MPIAIJ     *maij;
5678 
5679  PetscFunctionBegin;
5680   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5681   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5682   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5683   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5684   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5685   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5686   maij = (Mat_MPIAIJ*) (*mat)->data;
5687   maij->donotstash     = PETSC_TRUE;
5688   (*mat)->preallocated = PETSC_TRUE;
5689 
5690   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5691   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5692 
5693   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5694   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5695 
5696   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5697   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5698   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5699   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5700 
5701   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5702   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5703   PetscFunctionReturn(0);
5704 }
5705 
5706 /*
5707     Special version for direct calls from Fortran
5708 */
5709 #include <petsc-private/fortranimpl.h>
5710 
5711 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5712 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5713 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5714 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5715 #endif
5716 
5717 /* Change these macros so can be used in void function */
5718 #undef CHKERRQ
5719 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5720 #undef SETERRQ2
5721 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5722 #undef SETERRQ3
5723 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5724 #undef SETERRQ
5725 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5726 
5727 EXTERN_C_BEGIN
5728 #undef __FUNCT__
5729 #define __FUNCT__ "matsetvaluesmpiaij_"
5730 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5731 {
5732   Mat             mat = *mmat;
5733   PetscInt        m = *mm, n = *mn;
5734   InsertMode      addv = *maddv;
5735   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5736   PetscScalar     value;
5737   PetscErrorCode  ierr;
5738 
5739   MatCheckPreallocated(mat,1);
5740   if (mat->insertmode == NOT_SET_VALUES) {
5741     mat->insertmode = addv;
5742   }
5743 #if defined(PETSC_USE_DEBUG)
5744   else if (mat->insertmode != addv) {
5745     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5746   }
5747 #endif
5748   {
5749   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5750   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5751   PetscBool       roworiented = aij->roworiented;
5752 
5753   /* Some Variables required in the macro */
5754   Mat             A = aij->A;
5755   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5756   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5757   MatScalar       *aa = a->a;
5758   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5759   Mat             B = aij->B;
5760   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5761   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5762   MatScalar       *ba = b->a;
5763 
5764   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5765   PetscInt        nonew = a->nonew;
5766   MatScalar       *ap1,*ap2;
5767 
5768   PetscFunctionBegin;
5769   for (i=0; i<m; i++) {
5770     if (im[i] < 0) continue;
5771 #if defined(PETSC_USE_DEBUG)
5772     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5773 #endif
5774     if (im[i] >= rstart && im[i] < rend) {
5775       row      = im[i] - rstart;
5776       lastcol1 = -1;
5777       rp1      = aj + ai[row];
5778       ap1      = aa + ai[row];
5779       rmax1    = aimax[row];
5780       nrow1    = ailen[row];
5781       low1     = 0;
5782       high1    = nrow1;
5783       lastcol2 = -1;
5784       rp2      = bj + bi[row];
5785       ap2      = ba + bi[row];
5786       rmax2    = bimax[row];
5787       nrow2    = bilen[row];
5788       low2     = 0;
5789       high2    = nrow2;
5790 
5791       for (j=0; j<n; j++) {
5792         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5793         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5794         if (in[j] >= cstart && in[j] < cend){
5795           col = in[j] - cstart;
5796           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5797         } else if (in[j] < 0) continue;
5798 #if defined(PETSC_USE_DEBUG)
5799         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5800 #endif
5801         else {
5802           if (mat->was_assembled) {
5803             if (!aij->colmap) {
5804               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5805             }
5806 #if defined (PETSC_USE_CTABLE)
5807             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5808 	    col--;
5809 #else
5810             col = aij->colmap[in[j]] - 1;
5811 #endif
5812             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5813               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5814               col =  in[j];
5815               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5816               B = aij->B;
5817               b = (Mat_SeqAIJ*)B->data;
5818               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5819               rp2      = bj + bi[row];
5820               ap2      = ba + bi[row];
5821               rmax2    = bimax[row];
5822               nrow2    = bilen[row];
5823               low2     = 0;
5824               high2    = nrow2;
5825               bm       = aij->B->rmap->n;
5826               ba = b->a;
5827             }
5828           } else col = in[j];
5829           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5830         }
5831       }
5832     } else {
5833       if (!aij->donotstash) {
5834         if (roworiented) {
5835           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5836         } else {
5837           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5838         }
5839       }
5840     }
5841   }}
5842   PetscFunctionReturnVoid();
5843 }
5844 EXTERN_C_END
5845 
5846