xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8cbdbec6f8317ddf7886f91eb9c6bd083b543c50)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233 	for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236 	  jj++;
237 	}
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267 	for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270 	  jj++;
271 	}
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++){
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else             low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414 { \
415     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416     lastcol2 = col;\
417     while (high2-low2 > 5) { \
418       t = (low2+high2)/2; \
419       if (rp2[t] > col) high2 = t; \
420       else             low2  = t; \
421     } \
422     for (_i=low2; _i<high2; _i++) {		\
423       if (rp2[_i] > col) break;			\
424       if (rp2[_i] == col) {			      \
425 	if (addv == ADD_VALUES) ap2[_i] += value;     \
426 	else                    ap2[_i] = value;      \
427 	goto b_noinsert;			      \
428       }						      \
429     }							      \
430     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
432     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434     N = nrow2++ - 1; b->nz++; high2++;					\
435     /* shift up all the later entries in this row */			\
436     for (ii=N; ii>=_i; ii--) {						\
437       rp2[ii+1] = rp2[ii];						\
438       ap2[ii+1] = ap2[ii];						\
439     }									\
440     rp2[_i] = col;							\
441     ap2[_i] = value;							\
442     b_noinsert: ;								\
443     bilen[row] = nrow2;							\
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat            A = aij->A;
487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
488   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar      *aa = a->a;
490   PetscBool      ignorezeroentries = a->ignorezeroentries;
491   Mat            B = aij->B;
492   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
493   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar      *ba = b->a;
495 
496   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt       nonew;
498   MatScalar      *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend){
528           col = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined (PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542 	    col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B = aij->B;
551               b = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2      = bj + bi[row];
554               ap2      = ba + bi[row];
555               rmax2    = bimax[row];
556               nrow2    = bilen[row];
557               low2     = 0;
558               high2    = nrow2;
559               bm       = aij->B->rmap->n;
560               ba = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend){
601           col = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined (PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col --;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       nstash,reallocs;
635   InsertMode     addv;
636 
637   PetscFunctionBegin;
638   if (aij->donotstash || mat->nooffprocentries) {
639     PetscFunctionReturn(0);
640   }
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n;) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724 {
725   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
730   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatZeroRows_MPIAIJ"
736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737 {
738   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode    ierr;
740   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741   PetscInt          i,*owners = A->rmap->range;
742   PetscInt          *nprocs,j,idx,nsends,row;
743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
744   PetscInt          *rvalues,count,base,slen,*source;
745   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
746   MPI_Comm          comm = ((PetscObject)A)->comm;
747   MPI_Request       *send_waits,*recv_waits;
748   MPI_Status        recv_status,*send_status;
749   const PetscScalar *xx;
750   PetscScalar       *bb;
751 #if defined(PETSC_DEBUG)
752   PetscBool      found = PETSC_FALSE;
753 #endif
754 
755   PetscFunctionBegin;
756   /*  first count number of contributors to each processor */
757   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
758   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
759   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
760   j = 0;
761   for (i=0; i<N; i++) {
762     if (lastidx > (idx = rows[i])) j = 0;
763     lastidx = idx;
764     for (; j<size; j++) {
765       if (idx >= owners[j] && idx < owners[j+1]) {
766         nprocs[2*j]++;
767         nprocs[2*j+1] = 1;
768         owner[i] = j;
769 #if defined(PETSC_DEBUG)
770         found = PETSC_TRUE;
771 #endif
772         break;
773       }
774     }
775 #if defined(PETSC_DEBUG)
776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777     found = PETSC_FALSE;
778 #endif
779   }
780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
781 
782   if (A->nooffproczerorows) {
783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784     nrecvs = nsends;
785     nmax   = N;
786   } else {
787     /* inform other processors of number of messages and max length*/
788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
789   }
790 
791   /* post receives:   */
792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
794   for (i=0; i<nrecvs; i++) {
795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
796   }
797 
798   /* do sends:
799       1) starts[i] gives the starting index in svalues for stuff going to
800          the ith processor
801   */
802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
805   starts[0] = 0;
806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807   for (i=0; i<N; i++) {
808     svalues[starts[owner[i]]++] = rows[i];
809   }
810 
811   starts[0] = 0;
812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813   count = 0;
814   for (i=0; i<size; i++) {
815     if (nprocs[2*i+1]) {
816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
817     }
818   }
819   ierr = PetscFree(starts);CHKERRQ(ierr);
820 
821   base = owners[rank];
822 
823   /*  wait on receives */
824   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
825   count  = nrecvs; slen = 0;
826   while (count) {
827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
828     /* unpack receives into our local space */
829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
830     source[imdex]  = recv_status.MPI_SOURCE;
831     lens[imdex]    = n;
832     slen          += n;
833     count--;
834   }
835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
836 
837   /* move the data into the send scatter */
838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
839   count = 0;
840   for (i=0; i<nrecvs; i++) {
841     values = rvalues + i*nmax;
842     for (j=0; j<lens[i]; j++) {
843       lrows[count++] = values[j] - base;
844     }
845   }
846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
848   ierr = PetscFree(owner);CHKERRQ(ierr);
849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
850 
851   /* fix right hand side if needed */
852   if (x && b) {
853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
855     for (i=0; i<slen; i++) {
856       bb[lrows[i]] = diag*xx[lrows[i]];
857     }
858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
860   }
861   /*
862         Zero the required rows. If the "diagonal block" of the matrix
863      is square and the user wishes to set the diagonal we use separate
864      code so that MatSetValues() is not called for each diagonal allocating
865      new memory, thus calling lots of mallocs and slowing things down.
866 
867   */
868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
872   } else if (diag != 0.0) {
873     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877     }
878     for (i = 0; i < slen; i++) {
879       row  = lrows[i] + rstart;
880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   } else {
885     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(lrows);CHKERRQ(ierr);
888 
889   /* wait on sends */
890   if (nsends) {
891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
893     ierr = PetscFree(send_status);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
896   ierr = PetscFree(svalues);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903 {
904   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
905   PetscErrorCode    ierr;
906   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907   PetscInt          i,*owners = A->rmap->range;
908   PetscInt          *nprocs,j,idx,nsends;
909   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
910   PetscInt          *rvalues,count,base,slen,*source;
911   PetscInt          *lens,*lrows,*values,m;
912   MPI_Comm          comm = ((PetscObject)A)->comm;
913   MPI_Request       *send_waits,*recv_waits;
914   MPI_Status        recv_status,*send_status;
915   const PetscScalar *xx;
916   PetscScalar       *bb,*mask;
917   Vec               xmask,lmask;
918   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
919   const PetscInt    *aj, *ii,*ridx;
920   PetscScalar       *aa;
921 #if defined(PETSC_DEBUG)
922   PetscBool         found = PETSC_FALSE;
923 #endif
924 
925   PetscFunctionBegin;
926   /*  first count number of contributors to each processor */
927   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
928   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
929   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
930   j = 0;
931   for (i=0; i<N; i++) {
932     if (lastidx > (idx = rows[i])) j = 0;
933     lastidx = idx;
934     for (; j<size; j++) {
935       if (idx >= owners[j] && idx < owners[j+1]) {
936         nprocs[2*j]++;
937         nprocs[2*j+1] = 1;
938         owner[i] = j;
939 #if defined(PETSC_DEBUG)
940         found = PETSC_TRUE;
941 #endif
942         break;
943       }
944     }
945 #if defined(PETSC_DEBUG)
946     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947     found = PETSC_FALSE;
948 #endif
949   }
950   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
951 
952   /* inform other processors of number of messages and max length*/
953   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
954 
955   /* post receives:   */
956   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
957   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
958   for (i=0; i<nrecvs; i++) {
959     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
960   }
961 
962   /* do sends:
963       1) starts[i] gives the starting index in svalues for stuff going to
964          the ith processor
965   */
966   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
967   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
968   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
969   starts[0] = 0;
970   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971   for (i=0; i<N; i++) {
972     svalues[starts[owner[i]]++] = rows[i];
973   }
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count  = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994     source[imdex]  = recv_status.MPI_SOURCE;
995     lens[imdex]    = n;
996     slen          += n;
997     count--;
998   }
999   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1000 
1001   /* move the data into the send scatter */
1002   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1003   count = 0;
1004   for (i=0; i<nrecvs; i++) {
1005     values = rvalues + i*nmax;
1006     for (j=0; j<lens[i]; j++) {
1007       lrows[count++] = values[j] - base;
1008     }
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) {
1024     bb[lrows[i]] = 1;
1025   }
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use){
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++){
1050       n   = ii[i+1] - ii[i];
1051       aj  = aij->j + ii[i];
1052       aa  = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa        = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n   = ii[i+1] - ii[i];
1068       aj  = aij->j + ii[i];
1069       aa  = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa    = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190 
1191   /* Easy test: symmetric diagonal block */
1192   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1194   if (!*f) PetscFunctionReturn(0);
1195   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1197   if (size == 1) PetscFunctionReturn(0);
1198 
1199   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1201   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1202   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1203   for (i=0; i<first; i++) notme[i] = i;
1204   for (i=last; i<M; i++) notme[i-last+first] = i;
1205   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1206   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1207   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1208   Aoff = Aoffs[0];
1209   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1210   Boff = Boffs[0];
1211   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1213   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1215   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1216   ierr = PetscFree(notme);CHKERRQ(ierr);
1217    PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   /* do nondiagonal part */
1230   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1231   /* send it on its way */
1232   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   /* do local part */
1234   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1235   /* receive remote parts */
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247 {
1248   PetscErrorCode ierr;
1249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1250 
1251   PetscFunctionBegin;
1252   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatScale_MPIAIJ"
1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261 {
1262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1267   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDestroy_MPIAIJ"
1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274 {
1275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279 #if defined(PETSC_USE_LOG)
1280   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281 #endif
1282   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1283   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1284   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1288 #else
1289   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1292   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1293   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1294   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1295   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1316   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1319   int               fd;
1320   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1321   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322   PetscScalar       *column_values;
1323   PetscInt          message_count,flowcontrolcount;
1324   FILE              *file;
1325 
1326   PetscFunctionBegin;
1327   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1328   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1329   nz   = A->nz + B->nz;
1330   if (!rank) {
1331     header[0] = MAT_FILE_CLASSID;
1332     header[1] = mat->rmap->N;
1333     header[2] = mat->cmap->N;
1334     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1335     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1336     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     /* get largest number of rows any processor has */
1338     rlen = mat->rmap->n;
1339     range = mat->rmap->range;
1340     for (i=1; i<size; i++) {
1341       rlen = PetscMax(rlen,range[i+1] - range[i]);
1342     }
1343   } else {
1344     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     rlen = mat->rmap->n;
1346   }
1347 
1348   /* load up the local row counts */
1349   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1350   for (i=0; i<mat->rmap->n; i++) {
1351     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352   }
1353 
1354   /* store the row lengths to the file */
1355   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1356   if (!rank) {
1357     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     for (i=1; i<size; i++) {
1359       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1360       rlen = range[i+1] - range[i];
1361       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1362       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1363     }
1364     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1367     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1368     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1369   }
1370   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1371 
1372   /* load up the local column indices */
1373   nzmax = nz; /* )th processor needs space a largest processor needs */
1374   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1376   cnt  = 0;
1377   for (i=0; i<mat->rmap->n; i++) {
1378     for (j=B->i[i]; j<B->i[i+1]; j++) {
1379       if ( (col = garray[B->j[j]]) > cstart) break;
1380       column_indices[cnt++] = col;
1381     }
1382     for (k=A->i[i]; k<A->i[i+1]; k++) {
1383       column_indices[cnt++] = A->j[k] + cstart;
1384     }
1385     for (; j<B->i[i+1]; j++) {
1386       column_indices[cnt++] = garray[B->j[j]];
1387     }
1388   }
1389   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1390 
1391   /* store the column indices to the file */
1392    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1393   if (!rank) {
1394     MPI_Status status;
1395     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1396     for (i=1; i<size; i++) {
1397       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1398       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1399       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1400       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1401       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1402     }
1403      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1406     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1408     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1409   }
1410   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1411 
1412   /* load up the local column values */
1413   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1414   cnt  = 0;
1415   for (i=0; i<mat->rmap->n; i++) {
1416     for (j=B->i[i]; j<B->i[i+1]; j++) {
1417       if ( garray[B->j[j]] > cstart) break;
1418       column_values[cnt++] = B->a[j];
1419     }
1420     for (k=A->i[i]; k<A->i[i+1]; k++) {
1421       column_values[cnt++] = A->a[k];
1422     }
1423     for (; j<B->i[i+1]; j++) {
1424       column_values[cnt++] = B->a[j];
1425     }
1426   }
1427   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1428 
1429   /* store the column values to the file */
1430    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1431   if (!rank) {
1432     MPI_Status status;
1433     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1434     for (i=1; i<size; i++) {
1435        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1436       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1437       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1438       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1439       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1440     }
1441     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1442   } else {
1443     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1444     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1445     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1446     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(column_values);CHKERRQ(ierr);
1449 
1450   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1451   if (file) {
1452     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1461 {
1462   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1463   PetscErrorCode    ierr;
1464   PetscMPIInt       rank = aij->rank,size = aij->size;
1465   PetscBool         isdraw,iascii,isbinary;
1466   PetscViewer       sviewer;
1467   PetscViewerFormat format;
1468 
1469   PetscFunctionBegin;
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1473   if (iascii) {
1474     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1475     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1476       MatInfo    info;
1477       PetscBool  inodes;
1478 
1479       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1480       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1482       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1483       if (!inodes) {
1484         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1485 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1486       } else {
1487         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1488 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1489       }
1490       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1492       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1493       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1494       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1495       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1497       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1498       PetscFunctionReturn(0);
1499     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1500       PetscInt   inodecount,inodelimit,*inodes;
1501       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1502       if (inodes) {
1503         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1504       } else {
1505         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1506       }
1507       PetscFunctionReturn(0);
1508     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510     }
1511   } else if (isbinary) {
1512     if (size == 1) {
1513       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1515     } else {
1516       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1517     }
1518     PetscFunctionReturn(0);
1519   } else if (isdraw) {
1520     PetscDraw  draw;
1521     PetscBool  isnull;
1522     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1523     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1524   }
1525 
1526   if (size == 1) {
1527     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1528     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1529   } else {
1530     /* assemble the entire matrix onto first processor. */
1531     Mat         A;
1532     Mat_SeqAIJ  *Aloc;
1533     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1534     MatScalar   *a;
1535 
1536     if (mat->rmap->N > 1024) {
1537       PetscBool  flg = PETSC_FALSE;
1538 
1539       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1540       if (!flg) {
1541         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1542       }
1543     }
1544 
1545     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1546     if (!rank) {
1547       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1548     } else {
1549       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1550     }
1551     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1552     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1553     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1554     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1555     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1556 
1557     /* copy over the A part */
1558     Aloc = (Mat_SeqAIJ*)aij->A->data;
1559     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1560     row = mat->rmap->rstart;
1561     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1562     for (i=0; i<m; i++) {
1563       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1564       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1565     }
1566     aj = Aloc->j;
1567     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1568 
1569     /* copy over the B part */
1570     Aloc = (Mat_SeqAIJ*)aij->B->data;
1571     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1572     row  = mat->rmap->rstart;
1573     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1574     ct   = cols;
1575     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1576     for (i=0; i<m; i++) {
1577       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1578       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1579     }
1580     ierr = PetscFree(ct);CHKERRQ(ierr);
1581     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583     /*
1584        Everyone has to call to draw the matrix since the graphics waits are
1585        synchronized across all processors that share the PetscDraw object
1586     */
1587     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1588     if (!rank) {
1589       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1590       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1591       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1592       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1593     }
1594     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1595     ierr = MatDestroy(&A);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatView_MPIAIJ"
1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1603 {
1604   PetscErrorCode ierr;
1605   PetscBool      iascii,isdraw,issocket,isbinary;
1606 
1607   PetscFunctionBegin;
1608   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1609   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1612   if (iascii || isdraw || isbinary || issocket) {
1613     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1614   } else {
1615     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatSOR_MPIAIJ"
1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   PetscErrorCode ierr;
1626   Vec            bb1 = 0;
1627   PetscBool      hasop;
1628 
1629   PetscFunctionBegin;
1630   if (flag == SOR_APPLY_UPPER) {
1631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1632     PetscFunctionReturn(0);
1633   }
1634 
1635   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1636     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1637   }
1638 
1639   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644 
1645     while (its--) {
1646       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648 
1649       /* update rhs: bb1 = bb - B*x */
1650       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1651       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1652 
1653       /* local sweep */
1654       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1655     }
1656   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1657     if (flag & SOR_ZERO_INITIAL_GUESS) {
1658       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1659       its--;
1660     }
1661     while (its--) {
1662       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664 
1665       /* update rhs: bb1 = bb - B*x */
1666       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1667       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1668 
1669       /* local sweep */
1670       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1671     }
1672   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1673     if (flag & SOR_ZERO_INITIAL_GUESS) {
1674       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1675       its--;
1676     }
1677     while (its--) {
1678       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680 
1681       /* update rhs: bb1 = bb - B*x */
1682       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1683       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1684 
1685       /* local sweep */
1686       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1687     }
1688   }  else if (flag & SOR_EISENSTAT) {
1689     Vec         xx1;
1690 
1691     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1693 
1694     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696     if (!mat->diag) {
1697       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1698       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1699     }
1700     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1701     if (hasop) {
1702       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1703     } else {
1704       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1705     }
1706     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1707 
1708     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1709 
1710     /* local sweep */
1711     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1712     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1713     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1715 
1716   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatPermute_MPIAIJ"
1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1723 {
1724   Mat            aA,aB,Aperm;
1725   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1726   PetscScalar    *aa,*ba;
1727   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1728   PetscSF        rowsf,sf;
1729   IS             parcolp = PETSC_NULL;
1730   PetscBool      done;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1735   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1736   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1737   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1738 
1739   /* Invert row permutation to find out where my rows should go */
1740   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1741   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1742   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1743   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1744   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1745 
1746   /* Invert column permutation to find out where my columns should go */
1747   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1748   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1749   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1750   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1751   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1752   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1753 
1754   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1755   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1756   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1757 
1758   /* Find out where my gcols should go */
1759   ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr);
1760   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1761   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1762   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1763   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1764   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1765   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1766 
1767   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1768   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1769   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1770   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1771   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1772   for (i=0; i<m; i++) {
1773     PetscInt row = rdest[i],rowner;
1774     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1775     for (j=ai[i]; j<ai[i+1]; j++) {
1776       PetscInt cowner,col = cdest[aj[j]];
1777       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1778       if (rowner == cowner) dnnz[i]++;
1779       else onnz[i]++;
1780     }
1781     for (j=bi[i]; j<bi[i+1]; j++) {
1782       PetscInt cowner,col = gcdest[bj[j]];
1783       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1784       if (rowner == cowner) dnnz[i]++;
1785       else onnz[i]++;
1786     }
1787   }
1788   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1789   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1790   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1791   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1792   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1793   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1794   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1795 
1796   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1797   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1798   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1799   for (i=0; i<m; i++) {
1800     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1801     PetscInt rowlen;
1802     rowlen = ai[i+1] - ai[i];
1803     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1804     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1805     rowlen = bi[i+1] - bi[i];
1806     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1807     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1808   }
1809   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1810   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1811   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1812   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1813   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1814   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1815   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1816   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1817   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1818   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1819   *B = Aperm;
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1825 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1826 {
1827   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1828   Mat            A = mat->A,B = mat->B;
1829   PetscErrorCode ierr;
1830   PetscReal      isend[5],irecv[5];
1831 
1832   PetscFunctionBegin;
1833   info->block_size     = 1.0;
1834   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1835   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1836   isend[3] = info->memory;  isend[4] = info->mallocs;
1837   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1838   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1839   isend[3] += info->memory;  isend[4] += info->mallocs;
1840   if (flag == MAT_LOCAL) {
1841     info->nz_used      = isend[0];
1842     info->nz_allocated = isend[1];
1843     info->nz_unneeded  = isend[2];
1844     info->memory       = isend[3];
1845     info->mallocs      = isend[4];
1846   } else if (flag == MAT_GLOBAL_MAX) {
1847     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1848     info->nz_used      = irecv[0];
1849     info->nz_allocated = irecv[1];
1850     info->nz_unneeded  = irecv[2];
1851     info->memory       = irecv[3];
1852     info->mallocs      = irecv[4];
1853   } else if (flag == MAT_GLOBAL_SUM) {
1854     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1855     info->nz_used      = irecv[0];
1856     info->nz_allocated = irecv[1];
1857     info->nz_unneeded  = irecv[2];
1858     info->memory       = irecv[3];
1859     info->mallocs      = irecv[4];
1860   }
1861   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1862   info->fill_ratio_needed = 0;
1863   info->factor_mallocs    = 0;
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatSetOption_MPIAIJ"
1869 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1870 {
1871   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1872   PetscErrorCode ierr;
1873 
1874   PetscFunctionBegin;
1875   switch (op) {
1876   case MAT_NEW_NONZERO_LOCATIONS:
1877   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1878   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1879   case MAT_KEEP_NONZERO_PATTERN:
1880   case MAT_NEW_NONZERO_LOCATION_ERR:
1881   case MAT_USE_INODES:
1882   case MAT_IGNORE_ZERO_ENTRIES:
1883     MatCheckPreallocated(A,1);
1884     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1885     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1886     break;
1887   case MAT_ROW_ORIENTED:
1888     a->roworiented = flg;
1889     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1890     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1891     break;
1892   case MAT_NEW_DIAGONALS:
1893     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1894     break;
1895   case MAT_IGNORE_OFF_PROC_ENTRIES:
1896     a->donotstash = flg;
1897     break;
1898   case MAT_SPD:
1899     A->spd_set                         = PETSC_TRUE;
1900     A->spd                             = flg;
1901     if (flg) {
1902       A->symmetric                     = PETSC_TRUE;
1903       A->structurally_symmetric        = PETSC_TRUE;
1904       A->symmetric_set                 = PETSC_TRUE;
1905       A->structurally_symmetric_set    = PETSC_TRUE;
1906     }
1907     break;
1908   case MAT_SYMMETRIC:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   case MAT_STRUCTURALLY_SYMMETRIC:
1912     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1913     break;
1914   case MAT_HERMITIAN:
1915     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1916     break;
1917   case MAT_SYMMETRY_ETERNAL:
1918     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1919     break;
1920   default:
1921     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1922   }
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatGetRow_MPIAIJ"
1928 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1929 {
1930   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1931   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1932   PetscErrorCode ierr;
1933   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1934   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1935   PetscInt       *cmap,*idx_p;
1936 
1937   PetscFunctionBegin;
1938   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1939   mat->getrowactive = PETSC_TRUE;
1940 
1941   if (!mat->rowvalues && (idx || v)) {
1942     /*
1943         allocate enough space to hold information from the longest row.
1944     */
1945     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1946     PetscInt   max = 1,tmp;
1947     for (i=0; i<matin->rmap->n; i++) {
1948       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1949       if (max < tmp) { max = tmp; }
1950     }
1951     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1952   }
1953 
1954   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1955   lrow = row - rstart;
1956 
1957   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1958   if (!v)   {pvA = 0; pvB = 0;}
1959   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1960   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1961   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1962   nztot = nzA + nzB;
1963 
1964   cmap  = mat->garray;
1965   if (v  || idx) {
1966     if (nztot) {
1967       /* Sort by increasing column numbers, assuming A and B already sorted */
1968       PetscInt imark = -1;
1969       if (v) {
1970         *v = v_p = mat->rowvalues;
1971         for (i=0; i<nzB; i++) {
1972           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1973           else break;
1974         }
1975         imark = i;
1976         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1977         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1978       }
1979       if (idx) {
1980         *idx = idx_p = mat->rowindices;
1981         if (imark > -1) {
1982           for (i=0; i<imark; i++) {
1983             idx_p[i] = cmap[cworkB[i]];
1984           }
1985         } else {
1986           for (i=0; i<nzB; i++) {
1987             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1988             else break;
1989           }
1990           imark = i;
1991         }
1992         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1993         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1994       }
1995     } else {
1996       if (idx) *idx = 0;
1997       if (v)   *v   = 0;
1998     }
1999   }
2000   *nz = nztot;
2001   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
2002   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
2008 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2009 {
2010   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2011 
2012   PetscFunctionBegin;
2013   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2014   aij->getrowactive = PETSC_FALSE;
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "MatNorm_MPIAIJ"
2020 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2021 {
2022   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2023   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2024   PetscErrorCode ierr;
2025   PetscInt       i,j,cstart = mat->cmap->rstart;
2026   PetscReal      sum = 0.0;
2027   MatScalar      *v;
2028 
2029   PetscFunctionBegin;
2030   if (aij->size == 1) {
2031     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2032   } else {
2033     if (type == NORM_FROBENIUS) {
2034       v = amat->a;
2035       for (i=0; i<amat->nz; i++) {
2036 #if defined(PETSC_USE_COMPLEX)
2037         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2038 #else
2039         sum += (*v)*(*v); v++;
2040 #endif
2041       }
2042       v = bmat->a;
2043       for (i=0; i<bmat->nz; i++) {
2044 #if defined(PETSC_USE_COMPLEX)
2045         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2046 #else
2047         sum += (*v)*(*v); v++;
2048 #endif
2049       }
2050       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2051       *norm = PetscSqrtReal(*norm);
2052     } else if (type == NORM_1) { /* max column norm */
2053       PetscReal *tmp,*tmp2;
2054       PetscInt  *jj,*garray = aij->garray;
2055       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2056       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2057       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2058       *norm = 0.0;
2059       v = amat->a; jj = amat->j;
2060       for (j=0; j<amat->nz; j++) {
2061         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2062       }
2063       v = bmat->a; jj = bmat->j;
2064       for (j=0; j<bmat->nz; j++) {
2065         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2066       }
2067       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2068       for (j=0; j<mat->cmap->N; j++) {
2069         if (tmp2[j] > *norm) *norm = tmp2[j];
2070       }
2071       ierr = PetscFree(tmp);CHKERRQ(ierr);
2072       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2073     } else if (type == NORM_INFINITY) { /* max row norm */
2074       PetscReal ntemp = 0.0;
2075       for (j=0; j<aij->A->rmap->n; j++) {
2076         v = amat->a + amat->i[j];
2077         sum = 0.0;
2078         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2079           sum += PetscAbsScalar(*v); v++;
2080         }
2081         v = bmat->a + bmat->i[j];
2082         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2083           sum += PetscAbsScalar(*v); v++;
2084         }
2085         if (sum > ntemp) ntemp = sum;
2086       }
2087       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2088     } else {
2089       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2090     }
2091   }
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatTranspose_MPIAIJ"
2097 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2098 {
2099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2100   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2101   PetscErrorCode ierr;
2102   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2103   PetscInt       cstart=A->cmap->rstart,ncol;
2104   Mat            B;
2105   MatScalar      *array;
2106 
2107   PetscFunctionBegin;
2108   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2109 
2110   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2111   ai = Aloc->i; aj = Aloc->j;
2112   bi = Bloc->i; bj = Bloc->j;
2113   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2114     PetscInt *d_nnz,*g_nnz,*o_nnz;
2115     PetscSFNode *oloc;
2116     PETSC_UNUSED PetscSF sf;
2117 
2118     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2119     /* compute d_nnz for preallocation */
2120     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2121     for (i=0; i<ai[ma]; i++) {
2122       d_nnz[aj[i]] ++;
2123       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2124     }
2125 #if defined(OMPI_MAJOR_VERSION) /* Open MPI Bug. https://bitbucket.org/petsc/petsc-dev/issue/9/implement-petscsf-without-one-sided */
2126     ierr = PetscMemcpy(o_nnz,d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2127 #else
2128     /* compute local off-diagonal contributions */
2129     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2130     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2131     /* map those to global */
2132     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2133     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2134     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2135     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2136     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2137     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2138 #endif
2139 
2140     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2141     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2142     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2143     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2144     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2145     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2146 #if defined(OMPI_MAJOR_VERSION)
2147     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2148 #endif
2149   } else {
2150     B = *matout;
2151     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2152     for (i=0; i<ai[ma]; i++){
2153       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2154     }
2155   }
2156 
2157   /* copy over the A part */
2158   array = Aloc->a;
2159   row = A->rmap->rstart;
2160   for (i=0; i<ma; i++) {
2161     ncol = ai[i+1]-ai[i];
2162     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2163     row++; array += ncol; aj += ncol;
2164   }
2165   aj = Aloc->j;
2166   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2167 
2168   /* copy over the B part */
2169   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2170   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2171   array = Bloc->a;
2172   row = A->rmap->rstart;
2173   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2174   cols_tmp = cols;
2175   for (i=0; i<mb; i++) {
2176     ncol = bi[i+1]-bi[i];
2177     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2178     row++; array += ncol; cols_tmp += ncol;
2179   }
2180   ierr = PetscFree(cols);CHKERRQ(ierr);
2181 
2182   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2183   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2184   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2185     *matout = B;
2186   } else {
2187     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2188   }
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2194 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2195 {
2196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2197   Mat            a = aij->A,b = aij->B;
2198   PetscErrorCode ierr;
2199   PetscInt       s1,s2,s3;
2200 
2201   PetscFunctionBegin;
2202   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2203   if (rr) {
2204     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2205     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2206     /* Overlap communication with computation. */
2207     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2208   }
2209   if (ll) {
2210     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2211     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2212     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2213   }
2214   /* scale  the diagonal block */
2215   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2216 
2217   if (rr) {
2218     /* Do a scatter end and then right scale the off-diagonal block */
2219     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2220     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2221   }
2222 
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2228 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2229 {
2230   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2231   PetscErrorCode ierr;
2232 
2233   PetscFunctionBegin;
2234   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatEqual_MPIAIJ"
2240 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2241 {
2242   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2243   Mat            a,b,c,d;
2244   PetscBool      flg;
2245   PetscErrorCode ierr;
2246 
2247   PetscFunctionBegin;
2248   a = matA->A; b = matA->B;
2249   c = matB->A; d = matB->B;
2250 
2251   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2252   if (flg) {
2253     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2254   }
2255   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatCopy_MPIAIJ"
2261 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2262 {
2263   PetscErrorCode ierr;
2264   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2265   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2266 
2267   PetscFunctionBegin;
2268   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2269   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2270     /* because of the column compression in the off-processor part of the matrix a->B,
2271        the number of columns in a->B and b->B may be different, hence we cannot call
2272        the MatCopy() directly on the two parts. If need be, we can provide a more
2273        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2274        then copying the submatrices */
2275     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2276   } else {
2277     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2278     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2279   }
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 #undef __FUNCT__
2284 #define __FUNCT__ "MatSetUp_MPIAIJ"
2285 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2286 {
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2296 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2297 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2298 {
2299   PetscInt          i,m=Y->rmap->N;
2300   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2301   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2302   const PetscInt    *xi = x->i,*yi = y->i;
2303 
2304   PetscFunctionBegin;
2305   /* Set the number of nonzeros in the new matrix */
2306   for (i=0; i<m; i++) {
2307     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2308     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2309     nnz[i] = 0;
2310     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2311       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2312       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2313       nnz[i]++;
2314     }
2315     for (; k<nzy; k++) nnz[i]++;
2316   }
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 #undef __FUNCT__
2321 #define __FUNCT__ "MatAXPY_MPIAIJ"
2322 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2323 {
2324   PetscErrorCode ierr;
2325   PetscInt       i;
2326   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2327   PetscBLASInt   bnz,one=1;
2328   Mat_SeqAIJ     *x,*y;
2329 
2330   PetscFunctionBegin;
2331   if (str == SAME_NONZERO_PATTERN) {
2332     PetscScalar alpha = a;
2333     x = (Mat_SeqAIJ *)xx->A->data;
2334     y = (Mat_SeqAIJ *)yy->A->data;
2335     bnz = PetscBLASIntCast(x->nz);
2336     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2337     x = (Mat_SeqAIJ *)xx->B->data;
2338     y = (Mat_SeqAIJ *)yy->B->data;
2339     bnz = PetscBLASIntCast(x->nz);
2340     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2341   } else if (str == SUBSET_NONZERO_PATTERN) {
2342     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2343 
2344     x = (Mat_SeqAIJ *)xx->B->data;
2345     y = (Mat_SeqAIJ *)yy->B->data;
2346     if (y->xtoy && y->XtoY != xx->B) {
2347       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2348       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2349     }
2350     if (!y->xtoy) { /* get xtoy */
2351       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2352       y->XtoY = xx->B;
2353       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2354     }
2355     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2356   } else {
2357     Mat B;
2358     PetscInt *nnz_d,*nnz_o;
2359     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2360     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2361     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2362     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2363     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2364     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2365     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2366     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2367     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2368     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2369     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2370     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2371     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2372     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2373   }
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatConjugate_MPIAIJ"
2381 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2382 {
2383 #if defined(PETSC_USE_COMPLEX)
2384   PetscErrorCode ierr;
2385   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2386 
2387   PetscFunctionBegin;
2388   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2389   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2390 #else
2391   PetscFunctionBegin;
2392 #endif
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatRealPart_MPIAIJ"
2398 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2399 {
2400   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2401   PetscErrorCode ierr;
2402 
2403   PetscFunctionBegin;
2404   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2405   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2411 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2412 {
2413   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2414   PetscErrorCode ierr;
2415 
2416   PetscFunctionBegin;
2417   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2418   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 #ifdef PETSC_HAVE_PBGL
2423 
2424 #include <boost/parallel/mpi/bsp_process_group.hpp>
2425 #include <boost/graph/distributed/ilu_default_graph.hpp>
2426 #include <boost/graph/distributed/ilu_0_block.hpp>
2427 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2428 #include <boost/graph/distributed/petsc/interface.hpp>
2429 #include <boost/multi_array.hpp>
2430 #include <boost/parallel/distributed_property_map->hpp>
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2434 /*
2435   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2436 */
2437 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2438 {
2439   namespace petsc = boost::distributed::petsc;
2440 
2441   namespace graph_dist = boost::graph::distributed;
2442   using boost::graph::distributed::ilu_default::process_group_type;
2443   using boost::graph::ilu_permuted;
2444 
2445   PetscBool       row_identity, col_identity;
2446   PetscContainer  c;
2447   PetscInt        m, n, M, N;
2448   PetscErrorCode  ierr;
2449 
2450   PetscFunctionBegin;
2451   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2452   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2453   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2454   if (!row_identity || !col_identity) {
2455     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2456   }
2457 
2458   process_group_type pg;
2459   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2460   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2461   lgraph_type&   level_graph = *lgraph_p;
2462   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2463 
2464   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2465   ilu_permuted(level_graph);
2466 
2467   /* put together the new matrix */
2468   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2469   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2470   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2471   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2472   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2473   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2474   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2475   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2476 
2477   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2478   ierr = PetscContainerSetPointer(c, lgraph_p);
2479   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2480   ierr = PetscContainerDestroy(&c);
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2486 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2487 {
2488   PetscFunctionBegin;
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "MatSolve_MPIAIJ"
2494 /*
2495   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2496 */
2497 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2498 {
2499   namespace graph_dist = boost::graph::distributed;
2500 
2501   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2502   lgraph_type*   lgraph_p;
2503   PetscContainer c;
2504   PetscErrorCode ierr;
2505 
2506   PetscFunctionBegin;
2507   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2508   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2509   ierr = VecCopy(b, x);CHKERRQ(ierr);
2510 
2511   PetscScalar* array_x;
2512   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2513   PetscInt sx;
2514   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2515 
2516   PetscScalar* array_b;
2517   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2518   PetscInt sb;
2519   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2520 
2521   lgraph_type&   level_graph = *lgraph_p;
2522   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2523 
2524   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2525   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2526                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2527 
2528   typedef boost::iterator_property_map<array_ref_type::iterator,
2529                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2530   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2531                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2532 
2533   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2534 
2535   PetscFunctionReturn(0);
2536 }
2537 #endif
2538 
2539 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2540   PetscInt       nzlocal,nsends,nrecvs;
2541   PetscMPIInt    *send_rank,*recv_rank;
2542   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2543   PetscScalar    *sbuf_a,**rbuf_a;
2544   PetscErrorCode (*Destroy)(Mat);
2545 } Mat_Redundant;
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2549 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2550 {
2551   PetscErrorCode       ierr;
2552   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2553   PetscInt             i;
2554 
2555   PetscFunctionBegin;
2556   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2557   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2558   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2559   for (i=0; i<redund->nrecvs; i++){
2560     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2561     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2562   }
2563   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2564   ierr = PetscFree(redund);CHKERRQ(ierr);
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 #undef __FUNCT__
2569 #define __FUNCT__ "MatDestroy_MatRedundant"
2570 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2571 {
2572   PetscErrorCode  ierr;
2573   PetscContainer  container;
2574   Mat_Redundant   *redund=PETSC_NULL;
2575 
2576   PetscFunctionBegin;
2577   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2578   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2579   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2580   A->ops->destroy = redund->Destroy;
2581   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2582   if (A->ops->destroy) {
2583     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2584   }
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 #undef __FUNCT__
2589 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2590 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2591 {
2592   PetscMPIInt    rank,size;
2593   MPI_Comm       comm=((PetscObject)mat)->comm;
2594   PetscErrorCode ierr;
2595   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2596   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2597   PetscInt       *rowrange=mat->rmap->range;
2598   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2599   Mat            A=aij->A,B=aij->B,C=*matredundant;
2600   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2601   PetscScalar    *sbuf_a;
2602   PetscInt       nzlocal=a->nz+b->nz;
2603   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2604   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2605   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2606   MatScalar      *aworkA,*aworkB;
2607   PetscScalar    *vals;
2608   PetscMPIInt    tag1,tag2,tag3,imdex;
2609   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2610                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2611   MPI_Status     recv_status,*send_status;
2612   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2613   PetscInt       **rbuf_j=PETSC_NULL;
2614   PetscScalar    **rbuf_a=PETSC_NULL;
2615   Mat_Redundant  *redund=PETSC_NULL;
2616   PetscContainer container;
2617 
2618   PetscFunctionBegin;
2619   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2620   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2621 
2622   if (reuse == MAT_REUSE_MATRIX) {
2623     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2624     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2625     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2626     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2627     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2628     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2629     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2630     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2631 
2632     nsends    = redund->nsends;
2633     nrecvs    = redund->nrecvs;
2634     send_rank = redund->send_rank;
2635     recv_rank = redund->recv_rank;
2636     sbuf_nz   = redund->sbuf_nz;
2637     rbuf_nz   = redund->rbuf_nz;
2638     sbuf_j    = redund->sbuf_j;
2639     sbuf_a    = redund->sbuf_a;
2640     rbuf_j    = redund->rbuf_j;
2641     rbuf_a    = redund->rbuf_a;
2642   }
2643 
2644   if (reuse == MAT_INITIAL_MATRIX){
2645     PetscMPIInt  subrank,subsize;
2646     PetscInt     nleftover,np_subcomm;
2647     /* get the destination processors' id send_rank, nsends and nrecvs */
2648     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2649     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2650     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2651     np_subcomm = size/nsubcomm;
2652     nleftover  = size - nsubcomm*np_subcomm;
2653     nsends = 0; nrecvs = 0;
2654     for (i=0; i<size; i++){ /* i=rank*/
2655       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2656         send_rank[nsends] = i; nsends++;
2657         recv_rank[nrecvs++] = i;
2658       }
2659     }
2660     if (rank >= size - nleftover){/* this proc is a leftover processor */
2661       i = size-nleftover-1;
2662       j = 0;
2663       while (j < nsubcomm - nleftover){
2664         send_rank[nsends++] = i;
2665         i--; j++;
2666       }
2667     }
2668 
2669     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2670       for (i=0; i<nleftover; i++){
2671         recv_rank[nrecvs++] = size-nleftover+i;
2672       }
2673     }
2674 
2675     /* allocate sbuf_j, sbuf_a */
2676     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2677     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2678     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2679   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2680 
2681   /* copy mat's local entries into the buffers */
2682   if (reuse == MAT_INITIAL_MATRIX){
2683     rownz_max = 0;
2684     rptr = sbuf_j;
2685     cols = sbuf_j + rend-rstart + 1;
2686     vals = sbuf_a;
2687     rptr[0] = 0;
2688     for (i=0; i<rend-rstart; i++){
2689       row = i + rstart;
2690       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2691       ncols  = nzA + nzB;
2692       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2693       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2694       /* load the column indices for this row into cols */
2695       lwrite = 0;
2696       for (l=0; l<nzB; l++) {
2697         if ((ctmp = bmap[cworkB[l]]) < cstart){
2698           vals[lwrite]   = aworkB[l];
2699           cols[lwrite++] = ctmp;
2700         }
2701       }
2702       for (l=0; l<nzA; l++){
2703         vals[lwrite]   = aworkA[l];
2704         cols[lwrite++] = cstart + cworkA[l];
2705       }
2706       for (l=0; l<nzB; l++) {
2707         if ((ctmp = bmap[cworkB[l]]) >= cend){
2708           vals[lwrite]   = aworkB[l];
2709           cols[lwrite++] = ctmp;
2710         }
2711       }
2712       vals += ncols;
2713       cols += ncols;
2714       rptr[i+1] = rptr[i] + ncols;
2715       if (rownz_max < ncols) rownz_max = ncols;
2716     }
2717     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2718   } else { /* only copy matrix values into sbuf_a */
2719     rptr = sbuf_j;
2720     vals = sbuf_a;
2721     rptr[0] = 0;
2722     for (i=0; i<rend-rstart; i++){
2723       row = i + rstart;
2724       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2725       ncols  = nzA + nzB;
2726       cworkB = b->j + b->i[i];
2727       aworkA = a->a + a->i[i];
2728       aworkB = b->a + b->i[i];
2729       lwrite = 0;
2730       for (l=0; l<nzB; l++) {
2731         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2732       }
2733       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2734       for (l=0; l<nzB; l++) {
2735         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2736       }
2737       vals += ncols;
2738       rptr[i+1] = rptr[i] + ncols;
2739     }
2740   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2741 
2742   /* send nzlocal to others, and recv other's nzlocal */
2743   /*--------------------------------------------------*/
2744   if (reuse == MAT_INITIAL_MATRIX){
2745     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2746     s_waits2 = s_waits3 + nsends;
2747     s_waits1 = s_waits2 + nsends;
2748     r_waits1 = s_waits1 + nsends;
2749     r_waits2 = r_waits1 + nrecvs;
2750     r_waits3 = r_waits2 + nrecvs;
2751   } else {
2752     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2753     r_waits3 = s_waits3 + nsends;
2754   }
2755 
2756   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2757   if (reuse == MAT_INITIAL_MATRIX){
2758     /* get new tags to keep the communication clean */
2759     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2760     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2761     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2762 
2763     /* post receives of other's nzlocal */
2764     for (i=0; i<nrecvs; i++){
2765       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2766     }
2767     /* send nzlocal to others */
2768     for (i=0; i<nsends; i++){
2769       sbuf_nz[i] = nzlocal;
2770       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2771     }
2772     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2773     count = nrecvs;
2774     while (count) {
2775       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2776       recv_rank[imdex] = recv_status.MPI_SOURCE;
2777       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2778       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2779 
2780       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2781       rbuf_nz[imdex] += i + 2;
2782       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2783       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2784       count--;
2785     }
2786     /* wait on sends of nzlocal */
2787     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2788     /* send mat->i,j to others, and recv from other's */
2789     /*------------------------------------------------*/
2790     for (i=0; i<nsends; i++){
2791       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2792       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2793     }
2794     /* wait on receives of mat->i,j */
2795     /*------------------------------*/
2796     count = nrecvs;
2797     while (count) {
2798       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2799       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2800       count--;
2801     }
2802     /* wait on sends of mat->i,j */
2803     /*---------------------------*/
2804     if (nsends) {
2805       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2806     }
2807   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2808 
2809   /* post receives, send and receive mat->a */
2810   /*----------------------------------------*/
2811   for (imdex=0; imdex<nrecvs; imdex++) {
2812     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2813   }
2814   for (i=0; i<nsends; i++){
2815     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2816   }
2817   count = nrecvs;
2818   while (count) {
2819     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2820     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2821     count--;
2822   }
2823   if (nsends) {
2824     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2825   }
2826 
2827   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2828 
2829   /* create redundant matrix */
2830   /*-------------------------*/
2831   if (reuse == MAT_INITIAL_MATRIX){
2832     /* compute rownz_max for preallocation */
2833     for (imdex=0; imdex<nrecvs; imdex++){
2834       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2835       rptr = rbuf_j[imdex];
2836       for (i=0; i<j; i++){
2837         ncols = rptr[i+1] - rptr[i];
2838         if (rownz_max < ncols) rownz_max = ncols;
2839       }
2840     }
2841 
2842     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2843     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2844     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2845     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2846     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2847     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2848   } else {
2849     C = *matredundant;
2850   }
2851 
2852   /* insert local matrix entries */
2853   rptr = sbuf_j;
2854   cols = sbuf_j + rend-rstart + 1;
2855   vals = sbuf_a;
2856   for (i=0; i<rend-rstart; i++){
2857     row   = i + rstart;
2858     ncols = rptr[i+1] - rptr[i];
2859     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2860     vals += ncols;
2861     cols += ncols;
2862   }
2863   /* insert received matrix entries */
2864   for (imdex=0; imdex<nrecvs; imdex++){
2865     rstart = rowrange[recv_rank[imdex]];
2866     rend   = rowrange[recv_rank[imdex]+1];
2867     rptr = rbuf_j[imdex];
2868     cols = rbuf_j[imdex] + rend-rstart + 1;
2869     vals = rbuf_a[imdex];
2870     for (i=0; i<rend-rstart; i++){
2871       row   = i + rstart;
2872       ncols = rptr[i+1] - rptr[i];
2873       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2874       vals += ncols;
2875       cols += ncols;
2876     }
2877   }
2878   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2879   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2880   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2881   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2882   if (reuse == MAT_INITIAL_MATRIX) {
2883     PetscContainer container;
2884     *matredundant = C;
2885     /* create a supporting struct and attach it to C for reuse */
2886     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2887     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2888     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2889     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2890     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2891     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2892 
2893     redund->nzlocal = nzlocal;
2894     redund->nsends  = nsends;
2895     redund->nrecvs  = nrecvs;
2896     redund->send_rank = send_rank;
2897     redund->recv_rank = recv_rank;
2898     redund->sbuf_nz = sbuf_nz;
2899     redund->rbuf_nz = rbuf_nz;
2900     redund->sbuf_j  = sbuf_j;
2901     redund->sbuf_a  = sbuf_a;
2902     redund->rbuf_j  = rbuf_j;
2903     redund->rbuf_a  = rbuf_a;
2904 
2905     redund->Destroy = C->ops->destroy;
2906     C->ops->destroy = MatDestroy_MatRedundant;
2907   }
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 #undef __FUNCT__
2912 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2913 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2914 {
2915   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2916   PetscErrorCode ierr;
2917   PetscInt       i,*idxb = 0;
2918   PetscScalar    *va,*vb;
2919   Vec            vtmp;
2920 
2921   PetscFunctionBegin;
2922   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2923   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2924   if (idx) {
2925     for (i=0; i<A->rmap->n; i++) {
2926       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2927     }
2928   }
2929 
2930   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2931   if (idx) {
2932     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2933   }
2934   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2935   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2936 
2937   for (i=0; i<A->rmap->n; i++){
2938     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2939       va[i] = vb[i];
2940       if (idx) idx[i] = a->garray[idxb[i]];
2941     }
2942   }
2943 
2944   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2945   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2946   ierr = PetscFree(idxb);CHKERRQ(ierr);
2947   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 #undef __FUNCT__
2952 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2953 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2954 {
2955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2956   PetscErrorCode ierr;
2957   PetscInt       i,*idxb = 0;
2958   PetscScalar    *va,*vb;
2959   Vec            vtmp;
2960 
2961   PetscFunctionBegin;
2962   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2963   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2964   if (idx) {
2965     for (i=0; i<A->cmap->n; i++) {
2966       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2967     }
2968   }
2969 
2970   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2971   if (idx) {
2972     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2973   }
2974   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2975   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2976 
2977   for (i=0; i<A->rmap->n; i++){
2978     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2979       va[i] = vb[i];
2980       if (idx) idx[i] = a->garray[idxb[i]];
2981     }
2982   }
2983 
2984   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2985   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2986   ierr = PetscFree(idxb);CHKERRQ(ierr);
2987   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 #undef __FUNCT__
2992 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2993 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2994 {
2995   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2996   PetscInt       n      = A->rmap->n;
2997   PetscInt       cstart = A->cmap->rstart;
2998   PetscInt      *cmap   = mat->garray;
2999   PetscInt      *diagIdx, *offdiagIdx;
3000   Vec            diagV, offdiagV;
3001   PetscScalar   *a, *diagA, *offdiagA;
3002   PetscInt       r;
3003   PetscErrorCode ierr;
3004 
3005   PetscFunctionBegin;
3006   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3007   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
3008   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
3009   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3010   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3011   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3012   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3013   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3014   for (r = 0; r < n; ++r) {
3015     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3016       a[r]   = diagA[r];
3017       idx[r] = cstart + diagIdx[r];
3018     } else {
3019       a[r]   = offdiagA[r];
3020       idx[r] = cmap[offdiagIdx[r]];
3021     }
3022   }
3023   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3024   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3025   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3026   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3027   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3028   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3029   PetscFunctionReturn(0);
3030 }
3031 
3032 #undef __FUNCT__
3033 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3034 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3035 {
3036   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
3037   PetscInt       n      = A->rmap->n;
3038   PetscInt       cstart = A->cmap->rstart;
3039   PetscInt      *cmap   = mat->garray;
3040   PetscInt      *diagIdx, *offdiagIdx;
3041   Vec            diagV, offdiagV;
3042   PetscScalar   *a, *diagA, *offdiagA;
3043   PetscInt       r;
3044   PetscErrorCode ierr;
3045 
3046   PetscFunctionBegin;
3047   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3048   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3049   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3050   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3051   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3052   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3053   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3054   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3055   for (r = 0; r < n; ++r) {
3056     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3057       a[r]   = diagA[r];
3058       idx[r] = cstart + diagIdx[r];
3059     } else {
3060       a[r]   = offdiagA[r];
3061       idx[r] = cmap[offdiagIdx[r]];
3062     }
3063   }
3064   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3065   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3066   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3067   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3068   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3069   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 #undef __FUNCT__
3074 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3075 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3076 {
3077   PetscErrorCode ierr;
3078   Mat            *dummy;
3079 
3080   PetscFunctionBegin;
3081   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3082   *newmat = *dummy;
3083   ierr = PetscFree(dummy);CHKERRQ(ierr);
3084   PetscFunctionReturn(0);
3085 }
3086 
3087 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3088 
3089 #undef __FUNCT__
3090 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3091 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3092 {
3093   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3094   PetscErrorCode ierr;
3095 
3096   PetscFunctionBegin;
3097   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3098   PetscFunctionReturn(0);
3099 }
3100 
3101 #undef __FUNCT__
3102 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3103 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3104 {
3105   PetscErrorCode ierr;
3106   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3107 
3108   PetscFunctionBegin;
3109   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3110   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3111   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3112   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3113   PetscFunctionReturn(0);
3114 }
3115 
3116 /* -------------------------------------------------------------------*/
3117 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3118        MatGetRow_MPIAIJ,
3119        MatRestoreRow_MPIAIJ,
3120        MatMult_MPIAIJ,
3121 /* 4*/ MatMultAdd_MPIAIJ,
3122        MatMultTranspose_MPIAIJ,
3123        MatMultTransposeAdd_MPIAIJ,
3124 #ifdef PETSC_HAVE_PBGL
3125        MatSolve_MPIAIJ,
3126 #else
3127        0,
3128 #endif
3129        0,
3130        0,
3131 /*10*/ 0,
3132        0,
3133        0,
3134        MatSOR_MPIAIJ,
3135        MatTranspose_MPIAIJ,
3136 /*15*/ MatGetInfo_MPIAIJ,
3137        MatEqual_MPIAIJ,
3138        MatGetDiagonal_MPIAIJ,
3139        MatDiagonalScale_MPIAIJ,
3140        MatNorm_MPIAIJ,
3141 /*20*/ MatAssemblyBegin_MPIAIJ,
3142        MatAssemblyEnd_MPIAIJ,
3143        MatSetOption_MPIAIJ,
3144        MatZeroEntries_MPIAIJ,
3145 /*24*/ MatZeroRows_MPIAIJ,
3146        0,
3147 #ifdef PETSC_HAVE_PBGL
3148        0,
3149 #else
3150        0,
3151 #endif
3152        0,
3153        0,
3154 /*29*/ MatSetUp_MPIAIJ,
3155 #ifdef PETSC_HAVE_PBGL
3156        0,
3157 #else
3158        0,
3159 #endif
3160        0,
3161        0,
3162        0,
3163 /*34*/ MatDuplicate_MPIAIJ,
3164        0,
3165        0,
3166        0,
3167        0,
3168 /*39*/ MatAXPY_MPIAIJ,
3169        MatGetSubMatrices_MPIAIJ,
3170        MatIncreaseOverlap_MPIAIJ,
3171        MatGetValues_MPIAIJ,
3172        MatCopy_MPIAIJ,
3173 /*44*/ MatGetRowMax_MPIAIJ,
3174        MatScale_MPIAIJ,
3175        0,
3176        0,
3177        MatZeroRowsColumns_MPIAIJ,
3178 /*49*/ MatSetRandom_MPIAIJ,
3179        0,
3180        0,
3181        0,
3182        0,
3183 /*54*/ MatFDColoringCreate_MPIAIJ,
3184        0,
3185        MatSetUnfactored_MPIAIJ,
3186        MatPermute_MPIAIJ,
3187        0,
3188 /*59*/ MatGetSubMatrix_MPIAIJ,
3189        MatDestroy_MPIAIJ,
3190        MatView_MPIAIJ,
3191        0,
3192        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3193 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3194        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3195        0,
3196        0,
3197        0,
3198 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3199        MatGetRowMinAbs_MPIAIJ,
3200        0,
3201        MatSetColoring_MPIAIJ,
3202 #if defined(PETSC_HAVE_ADIC)
3203        MatSetValuesAdic_MPIAIJ,
3204 #else
3205        0,
3206 #endif
3207        MatSetValuesAdifor_MPIAIJ,
3208 /*75*/ MatFDColoringApply_AIJ,
3209        0,
3210        0,
3211        0,
3212        MatFindZeroDiagonals_MPIAIJ,
3213 /*80*/ 0,
3214        0,
3215        0,
3216 /*83*/ MatLoad_MPIAIJ,
3217        0,
3218        0,
3219        0,
3220        0,
3221        0,
3222 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3223        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3224        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3225        MatPtAP_MPIAIJ_MPIAIJ,
3226        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3227 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3228        0,
3229        0,
3230        0,
3231        0,
3232 /*99*/ 0,
3233        0,
3234        0,
3235        MatConjugate_MPIAIJ,
3236        0,
3237 /*104*/MatSetValuesRow_MPIAIJ,
3238        MatRealPart_MPIAIJ,
3239        MatImaginaryPart_MPIAIJ,
3240        0,
3241        0,
3242 /*109*/0,
3243        MatGetRedundantMatrix_MPIAIJ,
3244        MatGetRowMin_MPIAIJ,
3245        0,
3246        0,
3247 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3248        0,
3249        0,
3250        0,
3251        0,
3252 /*119*/0,
3253        0,
3254        0,
3255        0,
3256        MatGetMultiProcBlock_MPIAIJ,
3257 /*124*/MatFindNonzeroRows_MPIAIJ,
3258        MatGetColumnNorms_MPIAIJ,
3259        MatInvertBlockDiagonal_MPIAIJ,
3260        0,
3261        MatGetSubMatricesParallel_MPIAIJ,
3262 /*129*/0,
3263        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3264        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3265        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3266        0,
3267 /*134*/0,
3268        0,
3269        0,
3270        0,
3271        0
3272 };
3273 
3274 /* ----------------------------------------------------------------------------------------*/
3275 
3276 EXTERN_C_BEGIN
3277 #undef __FUNCT__
3278 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3279 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3280 {
3281   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3282   PetscErrorCode ierr;
3283 
3284   PetscFunctionBegin;
3285   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3286   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3287   PetscFunctionReturn(0);
3288 }
3289 EXTERN_C_END
3290 
3291 EXTERN_C_BEGIN
3292 #undef __FUNCT__
3293 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3294 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3295 {
3296   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3297   PetscErrorCode ierr;
3298 
3299   PetscFunctionBegin;
3300   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3301   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3302   PetscFunctionReturn(0);
3303 }
3304 EXTERN_C_END
3305 
3306 EXTERN_C_BEGIN
3307 #undef __FUNCT__
3308 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3309 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3310 {
3311   Mat_MPIAIJ     *b;
3312   PetscErrorCode ierr;
3313   PetscInt       i;
3314   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3315 
3316   PetscFunctionBegin;
3317   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3318   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3319   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3320   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3321   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3322   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3323 
3324   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3325   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3326   if (d_nnz) {
3327     for (i=0; i<B->rmap->n; i++) {
3328       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3329     }
3330   }
3331   if (o_nnz) {
3332     for (i=0; i<B->rmap->n; i++) {
3333       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3334     }
3335   }
3336   b = (Mat_MPIAIJ*)B->data;
3337 
3338   if (!B->preallocated) {
3339     /* Explicitly create 2 MATSEQAIJ matrices. */
3340     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3341     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3342     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3343     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3344     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3345     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3346     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3347     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3348     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3349     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3350   }
3351 
3352   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3353   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3354   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3355   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3356   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3357   B->preallocated = PETSC_TRUE;
3358   PetscFunctionReturn(0);
3359 }
3360 EXTERN_C_END
3361 
3362 #undef __FUNCT__
3363 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3364 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3365 {
3366   Mat            mat;
3367   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3368   PetscErrorCode ierr;
3369 
3370   PetscFunctionBegin;
3371   *newmat       = 0;
3372   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3373   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3374   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3375   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3376   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3377   a    = (Mat_MPIAIJ*)mat->data;
3378 
3379   mat->factortype    = matin->factortype;
3380   mat->rmap->bs      = matin->rmap->bs;
3381   mat->cmap->bs      = matin->cmap->bs;
3382   mat->assembled    = PETSC_TRUE;
3383   mat->insertmode   = NOT_SET_VALUES;
3384   mat->preallocated = PETSC_TRUE;
3385 
3386   a->size           = oldmat->size;
3387   a->rank           = oldmat->rank;
3388   a->donotstash     = oldmat->donotstash;
3389   a->roworiented    = oldmat->roworiented;
3390   a->rowindices     = 0;
3391   a->rowvalues      = 0;
3392   a->getrowactive   = PETSC_FALSE;
3393 
3394   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3395   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3396 
3397   if (oldmat->colmap) {
3398 #if defined (PETSC_USE_CTABLE)
3399     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3400 #else
3401     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3402     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3403     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3404 #endif
3405   } else a->colmap = 0;
3406   if (oldmat->garray) {
3407     PetscInt len;
3408     len  = oldmat->B->cmap->n;
3409     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3410     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3411     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3412   } else a->garray = 0;
3413 
3414   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3415   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3416   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3417   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3418   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3419   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3420   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3421   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3422   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3423   *newmat = mat;
3424   PetscFunctionReturn(0);
3425 }
3426 
3427 
3428 
3429 #undef __FUNCT__
3430 #define __FUNCT__ "MatLoad_MPIAIJ"
3431 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3432 {
3433   PetscScalar    *vals,*svals;
3434   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3435   PetscErrorCode ierr;
3436   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3437   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3438   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3439   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3440   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3441   int            fd;
3442   PetscInt       bs = 1;
3443 
3444   PetscFunctionBegin;
3445   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3446   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3447   if (!rank) {
3448     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3449     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3450     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3451   }
3452 
3453   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3454   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3455   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3456 
3457   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3458 
3459   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3460   M = header[1]; N = header[2];
3461   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3462   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3463   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3464 
3465   /* If global sizes are set, check if they are consistent with that given in the file */
3466   if (sizesset) {
3467     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3468   }
3469   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3470   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3471 
3472   /* determine ownership of all (block) rows */
3473   if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3474   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3475   else m = newMat->rmap->n; /* Set by user */
3476 
3477   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3478   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3479 
3480   /* First process needs enough room for process with most rows */
3481   if (!rank) {
3482     mmax = rowners[1];
3483     for (i=2; i<=size; i++) {
3484       mmax = PetscMax(mmax, rowners[i]);
3485     }
3486   } else mmax = m;
3487 
3488   rowners[0] = 0;
3489   for (i=2; i<=size; i++) {
3490     rowners[i] += rowners[i-1];
3491   }
3492   rstart = rowners[rank];
3493   rend   = rowners[rank+1];
3494 
3495   /* distribute row lengths to all processors */
3496   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3497   if (!rank) {
3498     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3499     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3500     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3501     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3502     for (j=0; j<m; j++) {
3503       procsnz[0] += ourlens[j];
3504     }
3505     for (i=1; i<size; i++) {
3506       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3507       /* calculate the number of nonzeros on each processor */
3508       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3509         procsnz[i] += rowlengths[j];
3510       }
3511       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3512     }
3513     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3514   } else {
3515     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3516   }
3517 
3518   if (!rank) {
3519     /* determine max buffer needed and allocate it */
3520     maxnz = 0;
3521     for (i=0; i<size; i++) {
3522       maxnz = PetscMax(maxnz,procsnz[i]);
3523     }
3524     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3525 
3526     /* read in my part of the matrix column indices  */
3527     nz   = procsnz[0];
3528     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3529     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3530 
3531     /* read in every one elses and ship off */
3532     for (i=1; i<size; i++) {
3533       nz     = procsnz[i];
3534       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3535       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3536     }
3537     ierr = PetscFree(cols);CHKERRQ(ierr);
3538   } else {
3539     /* determine buffer space needed for message */
3540     nz = 0;
3541     for (i=0; i<m; i++) {
3542       nz += ourlens[i];
3543     }
3544     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3545 
3546     /* receive message of column indices*/
3547     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3548   }
3549 
3550   /* determine column ownership if matrix is not square */
3551   if (N != M) {
3552     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3553     else n = newMat->cmap->n;
3554     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3555     cstart = cend - n;
3556   } else {
3557     cstart = rstart;
3558     cend   = rend;
3559     n      = cend - cstart;
3560   }
3561 
3562   /* loop over local rows, determining number of off diagonal entries */
3563   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3564   jj = 0;
3565   for (i=0; i<m; i++) {
3566     for (j=0; j<ourlens[i]; j++) {
3567       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3568       jj++;
3569     }
3570   }
3571 
3572   for (i=0; i<m; i++) {
3573     ourlens[i] -= offlens[i];
3574   }
3575   if (!sizesset) {
3576     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3577   }
3578 
3579   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3580 
3581   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3582 
3583   for (i=0; i<m; i++) {
3584     ourlens[i] += offlens[i];
3585   }
3586 
3587   if (!rank) {
3588     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3589 
3590     /* read in my part of the matrix numerical values  */
3591     nz   = procsnz[0];
3592     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3593 
3594     /* insert into matrix */
3595     jj      = rstart;
3596     smycols = mycols;
3597     svals   = vals;
3598     for (i=0; i<m; i++) {
3599       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3600       smycols += ourlens[i];
3601       svals   += ourlens[i];
3602       jj++;
3603     }
3604 
3605     /* read in other processors and ship out */
3606     for (i=1; i<size; i++) {
3607       nz     = procsnz[i];
3608       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3609       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3610     }
3611     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3612   } else {
3613     /* receive numeric values */
3614     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3615 
3616     /* receive message of values*/
3617     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3618 
3619     /* insert into matrix */
3620     jj      = rstart;
3621     smycols = mycols;
3622     svals   = vals;
3623     for (i=0; i<m; i++) {
3624       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3625       smycols += ourlens[i];
3626       svals   += ourlens[i];
3627       jj++;
3628     }
3629   }
3630   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3631   ierr = PetscFree(vals);CHKERRQ(ierr);
3632   ierr = PetscFree(mycols);CHKERRQ(ierr);
3633   ierr = PetscFree(rowners);CHKERRQ(ierr);
3634   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3635   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3636 
3637   PetscFunctionReturn(0);
3638 }
3639 
3640 #undef __FUNCT__
3641 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3642 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3643 {
3644   PetscErrorCode ierr;
3645   IS             iscol_local;
3646   PetscInt       csize;
3647 
3648   PetscFunctionBegin;
3649   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3650   if (call == MAT_REUSE_MATRIX) {
3651     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3652     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3653   } else {
3654     PetscInt cbs;
3655     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3656     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3657     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3658   }
3659   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3660   if (call == MAT_INITIAL_MATRIX) {
3661     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3662     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3663   }
3664   PetscFunctionReturn(0);
3665 }
3666 
3667 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3668 #undef __FUNCT__
3669 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3670 /*
3671     Not great since it makes two copies of the submatrix, first an SeqAIJ
3672   in local and then by concatenating the local matrices the end result.
3673   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3674 
3675   Note: This requires a sequential iscol with all indices.
3676 */
3677 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3678 {
3679   PetscErrorCode ierr;
3680   PetscMPIInt    rank,size;
3681   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3682   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3683   PetscBool      allcolumns, colflag;
3684   Mat            M,Mreuse;
3685   MatScalar      *vwork,*aa;
3686   MPI_Comm       comm = ((PetscObject)mat)->comm;
3687   Mat_SeqAIJ     *aij;
3688 
3689 
3690   PetscFunctionBegin;
3691   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3692   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3693 
3694   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3695   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3696   if (colflag && ncol == mat->cmap->N){
3697     allcolumns = PETSC_TRUE;
3698   } else {
3699     allcolumns = PETSC_FALSE;
3700   }
3701   if (call ==  MAT_REUSE_MATRIX) {
3702     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3703     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3704     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3705   } else {
3706     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3707   }
3708 
3709   /*
3710       m - number of local rows
3711       n - number of columns (same on all processors)
3712       rstart - first row in new global matrix generated
3713   */
3714   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3715   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3716   if (call == MAT_INITIAL_MATRIX) {
3717     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3718     ii  = aij->i;
3719     jj  = aij->j;
3720 
3721     /*
3722         Determine the number of non-zeros in the diagonal and off-diagonal
3723         portions of the matrix in order to do correct preallocation
3724     */
3725 
3726     /* first get start and end of "diagonal" columns */
3727     if (csize == PETSC_DECIDE) {
3728       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3729       if (mglobal == n) { /* square matrix */
3730 	nlocal = m;
3731       } else {
3732         nlocal = n/size + ((n % size) > rank);
3733       }
3734     } else {
3735       nlocal = csize;
3736     }
3737     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3738     rstart = rend - nlocal;
3739     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3740 
3741     /* next, compute all the lengths */
3742     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3743     olens = dlens + m;
3744     for (i=0; i<m; i++) {
3745       jend = ii[i+1] - ii[i];
3746       olen = 0;
3747       dlen = 0;
3748       for (j=0; j<jend; j++) {
3749         if (*jj < rstart || *jj >= rend) olen++;
3750         else dlen++;
3751         jj++;
3752       }
3753       olens[i] = olen;
3754       dlens[i] = dlen;
3755     }
3756     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3757     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3758     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3759     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3760     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3761     ierr = PetscFree(dlens);CHKERRQ(ierr);
3762   } else {
3763     PetscInt ml,nl;
3764 
3765     M = *newmat;
3766     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3767     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3768     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3769     /*
3770          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3771        rather than the slower MatSetValues().
3772     */
3773     M->was_assembled = PETSC_TRUE;
3774     M->assembled     = PETSC_FALSE;
3775   }
3776   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3777   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3778   ii  = aij->i;
3779   jj  = aij->j;
3780   aa  = aij->a;
3781   for (i=0; i<m; i++) {
3782     row   = rstart + i;
3783     nz    = ii[i+1] - ii[i];
3784     cwork = jj;     jj += nz;
3785     vwork = aa;     aa += nz;
3786     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3787   }
3788 
3789   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3790   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3791   *newmat = M;
3792 
3793   /* save submatrix used in processor for next request */
3794   if (call ==  MAT_INITIAL_MATRIX) {
3795     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3796     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3797   }
3798 
3799   PetscFunctionReturn(0);
3800 }
3801 
3802 EXTERN_C_BEGIN
3803 #undef __FUNCT__
3804 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3805 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3806 {
3807   PetscInt       m,cstart, cend,j,nnz,i,d;
3808   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3809   const PetscInt *JJ;
3810   PetscScalar    *values;
3811   PetscErrorCode ierr;
3812 
3813   PetscFunctionBegin;
3814   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3815 
3816   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3817   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3818   m      = B->rmap->n;
3819   cstart = B->cmap->rstart;
3820   cend   = B->cmap->rend;
3821   rstart = B->rmap->rstart;
3822 
3823   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3824 
3825 #if defined(PETSC_USE_DEBUGGING)
3826   for (i=0; i<m; i++) {
3827     nnz     = Ii[i+1]- Ii[i];
3828     JJ      = J + Ii[i];
3829     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3830     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3831     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3832   }
3833 #endif
3834 
3835   for (i=0; i<m; i++) {
3836     nnz     = Ii[i+1]- Ii[i];
3837     JJ      = J + Ii[i];
3838     nnz_max = PetscMax(nnz_max,nnz);
3839     d       = 0;
3840     for (j=0; j<nnz; j++) {
3841       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3842     }
3843     d_nnz[i] = d;
3844     o_nnz[i] = nnz - d;
3845   }
3846   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3847   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3848 
3849   if (v) values = (PetscScalar*)v;
3850   else {
3851     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3852     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3853   }
3854 
3855   for (i=0; i<m; i++) {
3856     ii   = i + rstart;
3857     nnz  = Ii[i+1]- Ii[i];
3858     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3859   }
3860   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3861   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3862 
3863   if (!v) {
3864     ierr = PetscFree(values);CHKERRQ(ierr);
3865   }
3866   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3867   PetscFunctionReturn(0);
3868 }
3869 EXTERN_C_END
3870 
3871 #undef __FUNCT__
3872 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3873 /*@
3874    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3875    (the default parallel PETSc format).
3876 
3877    Collective on MPI_Comm
3878 
3879    Input Parameters:
3880 +  B - the matrix
3881 .  i - the indices into j for the start of each local row (starts with zero)
3882 .  j - the column indices for each local row (starts with zero)
3883 -  v - optional values in the matrix
3884 
3885    Level: developer
3886 
3887    Notes:
3888        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3889      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3890      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3891 
3892        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3893 
3894        The format which is used for the sparse matrix input, is equivalent to a
3895     row-major ordering.. i.e for the following matrix, the input data expected is
3896     as shown:
3897 
3898         1 0 0
3899         2 0 3     P0
3900        -------
3901         4 5 6     P1
3902 
3903      Process0 [P0]: rows_owned=[0,1]
3904         i =  {0,1,3}  [size = nrow+1  = 2+1]
3905         j =  {0,0,2}  [size = nz = 6]
3906         v =  {1,2,3}  [size = nz = 6]
3907 
3908      Process1 [P1]: rows_owned=[2]
3909         i =  {0,3}    [size = nrow+1  = 1+1]
3910         j =  {0,1,2}  [size = nz = 6]
3911         v =  {4,5,6}  [size = nz = 6]
3912 
3913 .keywords: matrix, aij, compressed row, sparse, parallel
3914 
3915 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3916           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3917 @*/
3918 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3919 {
3920   PetscErrorCode ierr;
3921 
3922   PetscFunctionBegin;
3923   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3924   PetscFunctionReturn(0);
3925 }
3926 
3927 #undef __FUNCT__
3928 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3929 /*@C
3930    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3931    (the default parallel PETSc format).  For good matrix assembly performance
3932    the user should preallocate the matrix storage by setting the parameters
3933    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3934    performance can be increased by more than a factor of 50.
3935 
3936    Collective on MPI_Comm
3937 
3938    Input Parameters:
3939 +  A - the matrix
3940 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3941            (same value is used for all local rows)
3942 .  d_nnz - array containing the number of nonzeros in the various rows of the
3943            DIAGONAL portion of the local submatrix (possibly different for each row)
3944            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3945            The size of this array is equal to the number of local rows, i.e 'm'.
3946            For matrices that will be factored, you must leave room for (and set)
3947            the diagonal entry even if it is zero.
3948 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3949            submatrix (same value is used for all local rows).
3950 -  o_nnz - array containing the number of nonzeros in the various rows of the
3951            OFF-DIAGONAL portion of the local submatrix (possibly different for
3952            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3953            structure. The size of this array is equal to the number
3954            of local rows, i.e 'm'.
3955 
3956    If the *_nnz parameter is given then the *_nz parameter is ignored
3957 
3958    The AIJ format (also called the Yale sparse matrix format or
3959    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3960    storage.  The stored row and column indices begin with zero.
3961    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3962 
3963    The parallel matrix is partitioned such that the first m0 rows belong to
3964    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3965    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3966 
3967    The DIAGONAL portion of the local submatrix of a processor can be defined
3968    as the submatrix which is obtained by extraction the part corresponding to
3969    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3970    first row that belongs to the processor, r2 is the last row belonging to
3971    the this processor, and c1-c2 is range of indices of the local part of a
3972    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3973    common case of a square matrix, the row and column ranges are the same and
3974    the DIAGONAL part is also square. The remaining portion of the local
3975    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3976 
3977    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3978 
3979    You can call MatGetInfo() to get information on how effective the preallocation was;
3980    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3981    You can also run with the option -info and look for messages with the string
3982    malloc in them to see if additional memory allocation was needed.
3983 
3984    Example usage:
3985 
3986    Consider the following 8x8 matrix with 34 non-zero values, that is
3987    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3988    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3989    as follows:
3990 
3991 .vb
3992             1  2  0  |  0  3  0  |  0  4
3993     Proc0   0  5  6  |  7  0  0  |  8  0
3994             9  0 10  | 11  0  0  | 12  0
3995     -------------------------------------
3996            13  0 14  | 15 16 17  |  0  0
3997     Proc1   0 18  0  | 19 20 21  |  0  0
3998             0  0  0  | 22 23  0  | 24  0
3999     -------------------------------------
4000     Proc2  25 26 27  |  0  0 28  | 29  0
4001            30  0  0  | 31 32 33  |  0 34
4002 .ve
4003 
4004    This can be represented as a collection of submatrices as:
4005 
4006 .vb
4007       A B C
4008       D E F
4009       G H I
4010 .ve
4011 
4012    Where the submatrices A,B,C are owned by proc0, D,E,F are
4013    owned by proc1, G,H,I are owned by proc2.
4014 
4015    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4016    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4017    The 'M','N' parameters are 8,8, and have the same values on all procs.
4018 
4019    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4020    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4021    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4022    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4023    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4024    matrix, ans [DF] as another SeqAIJ matrix.
4025 
4026    When d_nz, o_nz parameters are specified, d_nz storage elements are
4027    allocated for every row of the local diagonal submatrix, and o_nz
4028    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4029    One way to choose d_nz and o_nz is to use the max nonzerors per local
4030    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4031    In this case, the values of d_nz,o_nz are:
4032 .vb
4033      proc0 : dnz = 2, o_nz = 2
4034      proc1 : dnz = 3, o_nz = 2
4035      proc2 : dnz = 1, o_nz = 4
4036 .ve
4037    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4038    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4039    for proc3. i.e we are using 12+15+10=37 storage locations to store
4040    34 values.
4041 
4042    When d_nnz, o_nnz parameters are specified, the storage is specified
4043    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4044    In the above case the values for d_nnz,o_nnz are:
4045 .vb
4046      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4047      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4048      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4049 .ve
4050    Here the space allocated is sum of all the above values i.e 34, and
4051    hence pre-allocation is perfect.
4052 
4053    Level: intermediate
4054 
4055 .keywords: matrix, aij, compressed row, sparse, parallel
4056 
4057 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4058           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4059 @*/
4060 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4061 {
4062   PetscErrorCode ierr;
4063 
4064   PetscFunctionBegin;
4065   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4066   PetscValidType(B,1);
4067   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4068   PetscFunctionReturn(0);
4069 }
4070 
4071 #undef __FUNCT__
4072 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4073 /*@
4074      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4075          CSR format the local rows.
4076 
4077    Collective on MPI_Comm
4078 
4079    Input Parameters:
4080 +  comm - MPI communicator
4081 .  m - number of local rows (Cannot be PETSC_DECIDE)
4082 .  n - This value should be the same as the local size used in creating the
4083        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4084        calculated if N is given) For square matrices n is almost always m.
4085 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4086 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4087 .   i - row indices
4088 .   j - column indices
4089 -   a - matrix values
4090 
4091    Output Parameter:
4092 .   mat - the matrix
4093 
4094    Level: intermediate
4095 
4096    Notes:
4097        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4098      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4099      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4100 
4101        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4102 
4103        The format which is used for the sparse matrix input, is equivalent to a
4104     row-major ordering.. i.e for the following matrix, the input data expected is
4105     as shown:
4106 
4107         1 0 0
4108         2 0 3     P0
4109        -------
4110         4 5 6     P1
4111 
4112      Process0 [P0]: rows_owned=[0,1]
4113         i =  {0,1,3}  [size = nrow+1  = 2+1]
4114         j =  {0,0,2}  [size = nz = 6]
4115         v =  {1,2,3}  [size = nz = 6]
4116 
4117      Process1 [P1]: rows_owned=[2]
4118         i =  {0,3}    [size = nrow+1  = 1+1]
4119         j =  {0,1,2}  [size = nz = 6]
4120         v =  {4,5,6}  [size = nz = 6]
4121 
4122 .keywords: matrix, aij, compressed row, sparse, parallel
4123 
4124 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4125           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4126 @*/
4127 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4128 {
4129   PetscErrorCode ierr;
4130 
4131   PetscFunctionBegin;
4132   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4133   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4134   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4135   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4136   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4137   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4138   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4139   PetscFunctionReturn(0);
4140 }
4141 
4142 #undef __FUNCT__
4143 #define __FUNCT__ "MatCreateAIJ"
4144 /*@C
4145    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4146    (the default parallel PETSc format).  For good matrix assembly performance
4147    the user should preallocate the matrix storage by setting the parameters
4148    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4149    performance can be increased by more than a factor of 50.
4150 
4151    Collective on MPI_Comm
4152 
4153    Input Parameters:
4154 +  comm - MPI communicator
4155 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4156            This value should be the same as the local size used in creating the
4157            y vector for the matrix-vector product y = Ax.
4158 .  n - This value should be the same as the local size used in creating the
4159        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4160        calculated if N is given) For square matrices n is almost always m.
4161 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4162 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4163 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4164            (same value is used for all local rows)
4165 .  d_nnz - array containing the number of nonzeros in the various rows of the
4166            DIAGONAL portion of the local submatrix (possibly different for each row)
4167            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4168            The size of this array is equal to the number of local rows, i.e 'm'.
4169 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4170            submatrix (same value is used for all local rows).
4171 -  o_nnz - array containing the number of nonzeros in the various rows of the
4172            OFF-DIAGONAL portion of the local submatrix (possibly different for
4173            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4174            structure. The size of this array is equal to the number
4175            of local rows, i.e 'm'.
4176 
4177    Output Parameter:
4178 .  A - the matrix
4179 
4180    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4181    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4182    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4183 
4184    Notes:
4185    If the *_nnz parameter is given then the *_nz parameter is ignored
4186 
4187    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4188    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4189    storage requirements for this matrix.
4190 
4191    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4192    processor than it must be used on all processors that share the object for
4193    that argument.
4194 
4195    The user MUST specify either the local or global matrix dimensions
4196    (possibly both).
4197 
4198    The parallel matrix is partitioned across processors such that the
4199    first m0 rows belong to process 0, the next m1 rows belong to
4200    process 1, the next m2 rows belong to process 2 etc.. where
4201    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4202    values corresponding to [m x N] submatrix.
4203 
4204    The columns are logically partitioned with the n0 columns belonging
4205    to 0th partition, the next n1 columns belonging to the next
4206    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4207 
4208    The DIAGONAL portion of the local submatrix on any given processor
4209    is the submatrix corresponding to the rows and columns m,n
4210    corresponding to the given processor. i.e diagonal matrix on
4211    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4212    etc. The remaining portion of the local submatrix [m x (N-n)]
4213    constitute the OFF-DIAGONAL portion. The example below better
4214    illustrates this concept.
4215 
4216    For a square global matrix we define each processor's diagonal portion
4217    to be its local rows and the corresponding columns (a square submatrix);
4218    each processor's off-diagonal portion encompasses the remainder of the
4219    local matrix (a rectangular submatrix).
4220 
4221    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4222 
4223    When calling this routine with a single process communicator, a matrix of
4224    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4225    type of communicator, use the construction mechanism:
4226      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4227 
4228    By default, this format uses inodes (identical nodes) when possible.
4229    We search for consecutive rows with the same nonzero structure, thereby
4230    reusing matrix information to achieve increased efficiency.
4231 
4232    Options Database Keys:
4233 +  -mat_no_inode  - Do not use inodes
4234 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4235 -  -mat_aij_oneindex - Internally use indexing starting at 1
4236         rather than 0.  Note that when calling MatSetValues(),
4237         the user still MUST index entries starting at 0!
4238 
4239 
4240    Example usage:
4241 
4242    Consider the following 8x8 matrix with 34 non-zero values, that is
4243    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4244    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4245    as follows:
4246 
4247 .vb
4248             1  2  0  |  0  3  0  |  0  4
4249     Proc0   0  5  6  |  7  0  0  |  8  0
4250             9  0 10  | 11  0  0  | 12  0
4251     -------------------------------------
4252            13  0 14  | 15 16 17  |  0  0
4253     Proc1   0 18  0  | 19 20 21  |  0  0
4254             0  0  0  | 22 23  0  | 24  0
4255     -------------------------------------
4256     Proc2  25 26 27  |  0  0 28  | 29  0
4257            30  0  0  | 31 32 33  |  0 34
4258 .ve
4259 
4260    This can be represented as a collection of submatrices as:
4261 
4262 .vb
4263       A B C
4264       D E F
4265       G H I
4266 .ve
4267 
4268    Where the submatrices A,B,C are owned by proc0, D,E,F are
4269    owned by proc1, G,H,I are owned by proc2.
4270 
4271    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4272    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4273    The 'M','N' parameters are 8,8, and have the same values on all procs.
4274 
4275    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4276    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4277    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4278    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4279    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4280    matrix, ans [DF] as another SeqAIJ matrix.
4281 
4282    When d_nz, o_nz parameters are specified, d_nz storage elements are
4283    allocated for every row of the local diagonal submatrix, and o_nz
4284    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4285    One way to choose d_nz and o_nz is to use the max nonzerors per local
4286    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4287    In this case, the values of d_nz,o_nz are:
4288 .vb
4289      proc0 : dnz = 2, o_nz = 2
4290      proc1 : dnz = 3, o_nz = 2
4291      proc2 : dnz = 1, o_nz = 4
4292 .ve
4293    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4294    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4295    for proc3. i.e we are using 12+15+10=37 storage locations to store
4296    34 values.
4297 
4298    When d_nnz, o_nnz parameters are specified, the storage is specified
4299    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4300    In the above case the values for d_nnz,o_nnz are:
4301 .vb
4302      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4303      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4304      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4305 .ve
4306    Here the space allocated is sum of all the above values i.e 34, and
4307    hence pre-allocation is perfect.
4308 
4309    Level: intermediate
4310 
4311 .keywords: matrix, aij, compressed row, sparse, parallel
4312 
4313 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4314           MPIAIJ, MatCreateMPIAIJWithArrays()
4315 @*/
4316 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4317 {
4318   PetscErrorCode ierr;
4319   PetscMPIInt    size;
4320 
4321   PetscFunctionBegin;
4322   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4323   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4324   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4325   if (size > 1) {
4326     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4327     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4328   } else {
4329     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4330     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4331   }
4332   PetscFunctionReturn(0);
4333 }
4334 
4335 #undef __FUNCT__
4336 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4337 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4338 {
4339   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4340 
4341   PetscFunctionBegin;
4342   *Ad     = a->A;
4343   *Ao     = a->B;
4344   *colmap = a->garray;
4345   PetscFunctionReturn(0);
4346 }
4347 
4348 #undef __FUNCT__
4349 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4350 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4351 {
4352   PetscErrorCode ierr;
4353   PetscInt       i;
4354   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4355 
4356   PetscFunctionBegin;
4357   if (coloring->ctype == IS_COLORING_GLOBAL) {
4358     ISColoringValue *allcolors,*colors;
4359     ISColoring      ocoloring;
4360 
4361     /* set coloring for diagonal portion */
4362     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4363 
4364     /* set coloring for off-diagonal portion */
4365     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4366     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4367     for (i=0; i<a->B->cmap->n; i++) {
4368       colors[i] = allcolors[a->garray[i]];
4369     }
4370     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4371     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4372     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4373     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4374   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4375     ISColoringValue *colors;
4376     PetscInt        *larray;
4377     ISColoring      ocoloring;
4378 
4379     /* set coloring for diagonal portion */
4380     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4381     for (i=0; i<a->A->cmap->n; i++) {
4382       larray[i] = i + A->cmap->rstart;
4383     }
4384     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4385     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4386     for (i=0; i<a->A->cmap->n; i++) {
4387       colors[i] = coloring->colors[larray[i]];
4388     }
4389     ierr = PetscFree(larray);CHKERRQ(ierr);
4390     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4391     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4392     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4393 
4394     /* set coloring for off-diagonal portion */
4395     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4396     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4397     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4398     for (i=0; i<a->B->cmap->n; i++) {
4399       colors[i] = coloring->colors[larray[i]];
4400     }
4401     ierr = PetscFree(larray);CHKERRQ(ierr);
4402     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4403     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4404     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4405   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4406 
4407   PetscFunctionReturn(0);
4408 }
4409 
4410 #if defined(PETSC_HAVE_ADIC)
4411 #undef __FUNCT__
4412 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4413 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4414 {
4415   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4416   PetscErrorCode ierr;
4417 
4418   PetscFunctionBegin;
4419   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4420   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4421   PetscFunctionReturn(0);
4422 }
4423 #endif
4424 
4425 #undef __FUNCT__
4426 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4427 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4428 {
4429   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4430   PetscErrorCode ierr;
4431 
4432   PetscFunctionBegin;
4433   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4434   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4435   PetscFunctionReturn(0);
4436 }
4437 
4438 #undef __FUNCT__
4439 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4440 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4441 {
4442   PetscErrorCode ierr;
4443   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4444   PetscInt       *indx;
4445 
4446   PetscFunctionBegin;
4447   /* This routine will ONLY return MPIAIJ type matrix */
4448   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4449   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4450   if (n == PETSC_DECIDE){
4451     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4452   }
4453   /* Check sum(n) = N */
4454   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4455   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4456 
4457   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4458   rstart -= m;
4459 
4460   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4461   for (i=0;i<m;i++) {
4462     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4463     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4464     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4465   }
4466 
4467   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4468   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4469   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4470   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4471   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4472   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4473   PetscFunctionReturn(0);
4474 }
4475 
4476 #undef __FUNCT__
4477 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4478 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4479 {
4480   PetscErrorCode ierr;
4481   PetscInt       m,N,i,rstart,nnz,Ii;
4482   PetscInt       *indx;
4483   PetscScalar    *values;
4484 
4485   PetscFunctionBegin;
4486   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4487   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4488   for (i=0;i<m;i++) {
4489     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4490     Ii    = i + rstart;
4491     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4492     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4493   }
4494   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4495   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4496   PetscFunctionReturn(0);
4497 }
4498 
4499 #undef __FUNCT__
4500 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4501 /*@
4502       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4503                  matrices from each processor
4504 
4505     Collective on MPI_Comm
4506 
4507    Input Parameters:
4508 +    comm - the communicators the parallel matrix will live on
4509 .    inmat - the input sequential matrices
4510 .    n - number of local columns (or PETSC_DECIDE)
4511 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4512 
4513    Output Parameter:
4514 .    outmat - the parallel matrix generated
4515 
4516     Level: advanced
4517 
4518    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4519 
4520 @*/
4521 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4522 {
4523   PetscErrorCode ierr;
4524 
4525   PetscFunctionBegin;
4526   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4527   if (scall == MAT_INITIAL_MATRIX){
4528     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4529   }
4530   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4531   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4532   PetscFunctionReturn(0);
4533 }
4534 
4535 #undef __FUNCT__
4536 #define __FUNCT__ "MatFileSplit"
4537 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4538 {
4539   PetscErrorCode    ierr;
4540   PetscMPIInt       rank;
4541   PetscInt          m,N,i,rstart,nnz;
4542   size_t            len;
4543   const PetscInt    *indx;
4544   PetscViewer       out;
4545   char              *name;
4546   Mat               B;
4547   const PetscScalar *values;
4548 
4549   PetscFunctionBegin;
4550   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4551   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4552   /* Should this be the type of the diagonal block of A? */
4553   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4554   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4555   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4556   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4557   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4558   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4559   for (i=0;i<m;i++) {
4560     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4561     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4562     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4563   }
4564   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4565   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4566 
4567   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4568   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4569   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4570   sprintf(name,"%s.%d",outfile,rank);
4571   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4572   ierr = PetscFree(name);CHKERRQ(ierr);
4573   ierr = MatView(B,out);CHKERRQ(ierr);
4574   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4575   ierr = MatDestroy(&B);CHKERRQ(ierr);
4576   PetscFunctionReturn(0);
4577 }
4578 
4579 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4580 #undef __FUNCT__
4581 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4582 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4583 {
4584   PetscErrorCode       ierr;
4585   Mat_Merge_SeqsToMPI  *merge;
4586   PetscContainer       container;
4587 
4588   PetscFunctionBegin;
4589   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4590   if (container) {
4591     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4592     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4593     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4594     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4595     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4596     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4597     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4598     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4599     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4600     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4601     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4602     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4603     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4604     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4605     ierr = PetscFree(merge);CHKERRQ(ierr);
4606     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4607   }
4608   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4609   PetscFunctionReturn(0);
4610 }
4611 
4612 #include <../src/mat/utils/freespace.h>
4613 #include <petscbt.h>
4614 
4615 #undef __FUNCT__
4616 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4617 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4618 {
4619   PetscErrorCode       ierr;
4620   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4621   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4622   PetscMPIInt          size,rank,taga,*len_s;
4623   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4624   PetscInt             proc,m;
4625   PetscInt             **buf_ri,**buf_rj;
4626   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4627   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4628   MPI_Request          *s_waits,*r_waits;
4629   MPI_Status           *status;
4630   MatScalar            *aa=a->a;
4631   MatScalar            **abuf_r,*ba_i;
4632   Mat_Merge_SeqsToMPI  *merge;
4633   PetscContainer       container;
4634 
4635   PetscFunctionBegin;
4636   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4637 
4638   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4639   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4640 
4641   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4642   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4643 
4644   bi     = merge->bi;
4645   bj     = merge->bj;
4646   buf_ri = merge->buf_ri;
4647   buf_rj = merge->buf_rj;
4648 
4649   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4650   owners = merge->rowmap->range;
4651   len_s  = merge->len_s;
4652 
4653   /* send and recv matrix values */
4654   /*-----------------------------*/
4655   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4656   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4657 
4658   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4659   for (proc=0,k=0; proc<size; proc++){
4660     if (!len_s[proc]) continue;
4661     i = owners[proc];
4662     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4663     k++;
4664   }
4665 
4666   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4667   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4668   ierr = PetscFree(status);CHKERRQ(ierr);
4669 
4670   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4671   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4672 
4673   /* insert mat values of mpimat */
4674   /*----------------------------*/
4675   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4676   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4677 
4678   for (k=0; k<merge->nrecv; k++){
4679     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4680     nrows = *(buf_ri_k[k]);
4681     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4682     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4683   }
4684 
4685   /* set values of ba */
4686   m = merge->rowmap->n;
4687   for (i=0; i<m; i++) {
4688     arow = owners[rank] + i;
4689     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4690     bnzi = bi[i+1] - bi[i];
4691     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4692 
4693     /* add local non-zero vals of this proc's seqmat into ba */
4694     anzi = ai[arow+1] - ai[arow];
4695     aj   = a->j + ai[arow];
4696     aa   = a->a + ai[arow];
4697     nextaj = 0;
4698     for (j=0; nextaj<anzi; j++){
4699       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4700         ba_i[j] += aa[nextaj++];
4701       }
4702     }
4703 
4704     /* add received vals into ba */
4705     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4706       /* i-th row */
4707       if (i == *nextrow[k]) {
4708         anzi = *(nextai[k]+1) - *nextai[k];
4709         aj   = buf_rj[k] + *(nextai[k]);
4710         aa   = abuf_r[k] + *(nextai[k]);
4711         nextaj = 0;
4712         for (j=0; nextaj<anzi; j++){
4713           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4714             ba_i[j] += aa[nextaj++];
4715           }
4716         }
4717         nextrow[k]++; nextai[k]++;
4718       }
4719     }
4720     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4721   }
4722   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4723   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4724 
4725   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4726   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4727   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4728   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4729   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4730   PetscFunctionReturn(0);
4731 }
4732 
4733 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4734 
4735 #undef __FUNCT__
4736 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4737 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4738 {
4739   PetscErrorCode       ierr;
4740   Mat                  B_mpi;
4741   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4742   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4743   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4744   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4745   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4746   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4747   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4748   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4749   MPI_Status           *status;
4750   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4751   PetscBT              lnkbt;
4752   Mat_Merge_SeqsToMPI  *merge;
4753   PetscContainer       container;
4754 
4755   PetscFunctionBegin;
4756   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4757 
4758   /* make sure it is a PETSc comm */
4759   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4760   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4761   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4762 
4763   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4764   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4765 
4766   /* determine row ownership */
4767   /*---------------------------------------------------------*/
4768   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4769   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4770   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4771   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4772   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4773   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4774   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4775 
4776   m      = merge->rowmap->n;
4777   owners = merge->rowmap->range;
4778 
4779   /* determine the number of messages to send, their lengths */
4780   /*---------------------------------------------------------*/
4781   len_s  = merge->len_s;
4782 
4783   len = 0;  /* length of buf_si[] */
4784   merge->nsend = 0;
4785   for (proc=0; proc<size; proc++){
4786     len_si[proc] = 0;
4787     if (proc == rank){
4788       len_s[proc] = 0;
4789     } else {
4790       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4791       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4792     }
4793     if (len_s[proc]) {
4794       merge->nsend++;
4795       nrows = 0;
4796       for (i=owners[proc]; i<owners[proc+1]; i++){
4797         if (ai[i+1] > ai[i]) nrows++;
4798       }
4799       len_si[proc] = 2*(nrows+1);
4800       len += len_si[proc];
4801     }
4802   }
4803 
4804   /* determine the number and length of messages to receive for ij-structure */
4805   /*-------------------------------------------------------------------------*/
4806   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4807   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4808 
4809   /* post the Irecv of j-structure */
4810   /*-------------------------------*/
4811   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4812   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4813 
4814   /* post the Isend of j-structure */
4815   /*--------------------------------*/
4816   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4817 
4818   for (proc=0, k=0; proc<size; proc++){
4819     if (!len_s[proc]) continue;
4820     i = owners[proc];
4821     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4822     k++;
4823   }
4824 
4825   /* receives and sends of j-structure are complete */
4826   /*------------------------------------------------*/
4827   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4828   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4829 
4830   /* send and recv i-structure */
4831   /*---------------------------*/
4832   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4833   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4834 
4835   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4836   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4837   for (proc=0,k=0; proc<size; proc++){
4838     if (!len_s[proc]) continue;
4839     /* form outgoing message for i-structure:
4840          buf_si[0]:                 nrows to be sent
4841                [1:nrows]:           row index (global)
4842                [nrows+1:2*nrows+1]: i-structure index
4843     */
4844     /*-------------------------------------------*/
4845     nrows = len_si[proc]/2 - 1;
4846     buf_si_i    = buf_si + nrows+1;
4847     buf_si[0]   = nrows;
4848     buf_si_i[0] = 0;
4849     nrows = 0;
4850     for (i=owners[proc]; i<owners[proc+1]; i++){
4851       anzi = ai[i+1] - ai[i];
4852       if (anzi) {
4853         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4854         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4855         nrows++;
4856       }
4857     }
4858     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4859     k++;
4860     buf_si += len_si[proc];
4861   }
4862 
4863   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4864   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4865 
4866   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4867   for (i=0; i<merge->nrecv; i++){
4868     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4869   }
4870 
4871   ierr = PetscFree(len_si);CHKERRQ(ierr);
4872   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4873   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4874   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4875   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4876   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4877   ierr = PetscFree(status);CHKERRQ(ierr);
4878 
4879   /* compute a local seq matrix in each processor */
4880   /*----------------------------------------------*/
4881   /* allocate bi array and free space for accumulating nonzero column info */
4882   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4883   bi[0] = 0;
4884 
4885   /* create and initialize a linked list */
4886   nlnk = N+1;
4887   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4888 
4889   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4890   len  = ai[owners[rank+1]] - ai[owners[rank]];
4891   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4892   current_space = free_space;
4893 
4894   /* determine symbolic info for each local row */
4895   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4896 
4897   for (k=0; k<merge->nrecv; k++){
4898     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4899     nrows = *buf_ri_k[k];
4900     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4901     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4902   }
4903 
4904   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4905   len = 0;
4906   for (i=0;i<m;i++) {
4907     bnzi   = 0;
4908     /* add local non-zero cols of this proc's seqmat into lnk */
4909     arow   = owners[rank] + i;
4910     anzi   = ai[arow+1] - ai[arow];
4911     aj     = a->j + ai[arow];
4912     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4913     bnzi += nlnk;
4914     /* add received col data into lnk */
4915     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4916       if (i == *nextrow[k]) { /* i-th row */
4917         anzi = *(nextai[k]+1) - *nextai[k];
4918         aj   = buf_rj[k] + *nextai[k];
4919         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4920         bnzi += nlnk;
4921         nextrow[k]++; nextai[k]++;
4922       }
4923     }
4924     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4925 
4926     /* if free space is not available, make more free space */
4927     if (current_space->local_remaining<bnzi) {
4928       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4929       nspacedouble++;
4930     }
4931     /* copy data into free space, then initialize lnk */
4932     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4933     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4934 
4935     current_space->array           += bnzi;
4936     current_space->local_used      += bnzi;
4937     current_space->local_remaining -= bnzi;
4938 
4939     bi[i+1] = bi[i] + bnzi;
4940   }
4941 
4942   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4943 
4944   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4945   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4946   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4947 
4948   /* create symbolic parallel matrix B_mpi */
4949   /*---------------------------------------*/
4950     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4951   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4952   if (n==PETSC_DECIDE) {
4953     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4954   } else {
4955     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4956   }
4957   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4958   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4959   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4960   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4961   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4962 
4963   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4964   B_mpi->assembled     = PETSC_FALSE;
4965   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4966   merge->bi            = bi;
4967   merge->bj            = bj;
4968   merge->buf_ri        = buf_ri;
4969   merge->buf_rj        = buf_rj;
4970   merge->coi           = PETSC_NULL;
4971   merge->coj           = PETSC_NULL;
4972   merge->owners_co     = PETSC_NULL;
4973 
4974   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4975 
4976   /* attach the supporting struct to B_mpi for reuse */
4977   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4978   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4979   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4980   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4981   *mpimat = B_mpi;
4982 
4983   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4984   PetscFunctionReturn(0);
4985 }
4986 
4987 #undef __FUNCT__
4988 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4989 /*@C
4990       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4991                  matrices from each processor
4992 
4993     Collective on MPI_Comm
4994 
4995    Input Parameters:
4996 +    comm - the communicators the parallel matrix will live on
4997 .    seqmat - the input sequential matrices
4998 .    m - number of local rows (or PETSC_DECIDE)
4999 .    n - number of local columns (or PETSC_DECIDE)
5000 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5001 
5002    Output Parameter:
5003 .    mpimat - the parallel matrix generated
5004 
5005     Level: advanced
5006 
5007    Notes:
5008      The dimensions of the sequential matrix in each processor MUST be the same.
5009      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5010      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5011 @*/
5012 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5013 {
5014   PetscErrorCode   ierr;
5015   PetscMPIInt     size;
5016 
5017   PetscFunctionBegin;
5018   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5019   if (size == 1){
5020      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5021      if (scall == MAT_INITIAL_MATRIX){
5022        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5023      } else {
5024        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5025      }
5026      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5027      PetscFunctionReturn(0);
5028   }
5029   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5030   if (scall == MAT_INITIAL_MATRIX){
5031     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5032   }
5033   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5034   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5035   PetscFunctionReturn(0);
5036 }
5037 
5038 #undef __FUNCT__
5039 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5040 /*@
5041      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5042           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5043           with MatGetSize()
5044 
5045     Not Collective
5046 
5047    Input Parameters:
5048 +    A - the matrix
5049 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5050 
5051    Output Parameter:
5052 .    A_loc - the local sequential matrix generated
5053 
5054     Level: developer
5055 
5056 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5057 
5058 @*/
5059 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5060 {
5061   PetscErrorCode  ierr;
5062   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
5063   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5064   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5065   MatScalar       *aa=a->a,*ba=b->a,*cam;
5066   PetscScalar     *ca;
5067   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5068   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5069   PetscBool       match;
5070 
5071   PetscFunctionBegin;
5072   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5073   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5074   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5075   if (scall == MAT_INITIAL_MATRIX){
5076     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5077     ci[0] = 0;
5078     for (i=0; i<am; i++){
5079       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5080     }
5081     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5082     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5083     k = 0;
5084     for (i=0; i<am; i++) {
5085       ncols_o = bi[i+1] - bi[i];
5086       ncols_d = ai[i+1] - ai[i];
5087       /* off-diagonal portion of A */
5088       for (jo=0; jo<ncols_o; jo++) {
5089         col = cmap[*bj];
5090         if (col >= cstart) break;
5091         cj[k]   = col; bj++;
5092         ca[k++] = *ba++;
5093       }
5094       /* diagonal portion of A */
5095       for (j=0; j<ncols_d; j++) {
5096         cj[k]   = cstart + *aj++;
5097         ca[k++] = *aa++;
5098       }
5099       /* off-diagonal portion of A */
5100       for (j=jo; j<ncols_o; j++) {
5101         cj[k]   = cmap[*bj++];
5102         ca[k++] = *ba++;
5103       }
5104     }
5105     /* put together the new matrix */
5106     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5107     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5108     /* Since these are PETSc arrays, change flags to free them as necessary. */
5109     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5110     mat->free_a  = PETSC_TRUE;
5111     mat->free_ij = PETSC_TRUE;
5112     mat->nonew   = 0;
5113   } else if (scall == MAT_REUSE_MATRIX){
5114     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5115     ci = mat->i; cj = mat->j; cam = mat->a;
5116     for (i=0; i<am; i++) {
5117       /* off-diagonal portion of A */
5118       ncols_o = bi[i+1] - bi[i];
5119       for (jo=0; jo<ncols_o; jo++) {
5120         col = cmap[*bj];
5121         if (col >= cstart) break;
5122         *cam++ = *ba++; bj++;
5123       }
5124       /* diagonal portion of A */
5125       ncols_d = ai[i+1] - ai[i];
5126       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5127       /* off-diagonal portion of A */
5128       for (j=jo; j<ncols_o; j++) {
5129         *cam++ = *ba++; bj++;
5130       }
5131     }
5132   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5133   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5134   PetscFunctionReturn(0);
5135 }
5136 
5137 #undef __FUNCT__
5138 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5139 /*@C
5140      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5141 
5142     Not Collective
5143 
5144    Input Parameters:
5145 +    A - the matrix
5146 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5147 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5148 
5149    Output Parameter:
5150 .    A_loc - the local sequential matrix generated
5151 
5152     Level: developer
5153 
5154 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5155 
5156 @*/
5157 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5158 {
5159   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5160   PetscErrorCode    ierr;
5161   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5162   IS                isrowa,iscola;
5163   Mat               *aloc;
5164   PetscBool       match;
5165 
5166   PetscFunctionBegin;
5167   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5168   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5169   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5170   if (!row){
5171     start = A->rmap->rstart; end = A->rmap->rend;
5172     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5173   } else {
5174     isrowa = *row;
5175   }
5176   if (!col){
5177     start = A->cmap->rstart;
5178     cmap  = a->garray;
5179     nzA   = a->A->cmap->n;
5180     nzB   = a->B->cmap->n;
5181     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5182     ncols = 0;
5183     for (i=0; i<nzB; i++) {
5184       if (cmap[i] < start) idx[ncols++] = cmap[i];
5185       else break;
5186     }
5187     imark = i;
5188     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5189     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5190     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5191   } else {
5192     iscola = *col;
5193   }
5194   if (scall != MAT_INITIAL_MATRIX){
5195     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5196     aloc[0] = *A_loc;
5197   }
5198   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5199   *A_loc = aloc[0];
5200   ierr = PetscFree(aloc);CHKERRQ(ierr);
5201   if (!row){
5202     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5203   }
5204   if (!col){
5205     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5206   }
5207   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5208   PetscFunctionReturn(0);
5209 }
5210 
5211 #undef __FUNCT__
5212 #define __FUNCT__ "MatGetBrowsOfAcols"
5213 /*@C
5214     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5215 
5216     Collective on Mat
5217 
5218    Input Parameters:
5219 +    A,B - the matrices in mpiaij format
5220 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5221 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5222 
5223    Output Parameter:
5224 +    rowb, colb - index sets of rows and columns of B to extract
5225 -    B_seq - the sequential matrix generated
5226 
5227     Level: developer
5228 
5229 @*/
5230 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5231 {
5232   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5233   PetscErrorCode    ierr;
5234   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5235   IS                isrowb,iscolb;
5236   Mat               *bseq=PETSC_NULL;
5237 
5238   PetscFunctionBegin;
5239   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5240     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5241   }
5242   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5243 
5244   if (scall == MAT_INITIAL_MATRIX){
5245     start = A->cmap->rstart;
5246     cmap  = a->garray;
5247     nzA   = a->A->cmap->n;
5248     nzB   = a->B->cmap->n;
5249     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5250     ncols = 0;
5251     for (i=0; i<nzB; i++) {  /* row < local row index */
5252       if (cmap[i] < start) idx[ncols++] = cmap[i];
5253       else break;
5254     }
5255     imark = i;
5256     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5257     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5258     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5259     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5260   } else {
5261     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5262     isrowb = *rowb; iscolb = *colb;
5263     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5264     bseq[0] = *B_seq;
5265   }
5266   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5267   *B_seq = bseq[0];
5268   ierr = PetscFree(bseq);CHKERRQ(ierr);
5269   if (!rowb){
5270     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5271   } else {
5272     *rowb = isrowb;
5273   }
5274   if (!colb){
5275     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5276   } else {
5277     *colb = iscolb;
5278   }
5279   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5280   PetscFunctionReturn(0);
5281 }
5282 
5283 #undef __FUNCT__
5284 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5285 /*
5286     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5287     of the OFF-DIAGONAL portion of local A
5288 
5289     Collective on Mat
5290 
5291    Input Parameters:
5292 +    A,B - the matrices in mpiaij format
5293 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5294 
5295    Output Parameter:
5296 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5297 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5298 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5299 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5300 
5301     Level: developer
5302 
5303 */
5304 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5305 {
5306   VecScatter_MPI_General *gen_to,*gen_from;
5307   PetscErrorCode         ierr;
5308   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5309   Mat_SeqAIJ             *b_oth;
5310   VecScatter             ctx=a->Mvctx;
5311   MPI_Comm               comm=((PetscObject)ctx)->comm;
5312   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5313   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5314   PetscScalar            *rvalues,*svalues;
5315   MatScalar              *b_otha,*bufa,*bufA;
5316   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5317   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5318   MPI_Status             *sstatus,rstatus;
5319   PetscMPIInt            jj;
5320   PetscInt               *cols,sbs,rbs;
5321   PetscScalar            *vals;
5322 
5323   PetscFunctionBegin;
5324   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5325     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5326   }
5327   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5328   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5329 
5330   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5331   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5332   rvalues  = gen_from->values; /* holds the length of receiving row */
5333   svalues  = gen_to->values;   /* holds the length of sending row */
5334   nrecvs   = gen_from->n;
5335   nsends   = gen_to->n;
5336 
5337   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5338   srow     = gen_to->indices;   /* local row index to be sent */
5339   sstarts  = gen_to->starts;
5340   sprocs   = gen_to->procs;
5341   sstatus  = gen_to->sstatus;
5342   sbs      = gen_to->bs;
5343   rstarts  = gen_from->starts;
5344   rprocs   = gen_from->procs;
5345   rbs      = gen_from->bs;
5346 
5347   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5348   if (scall == MAT_INITIAL_MATRIX){
5349     /* i-array */
5350     /*---------*/
5351     /*  post receives */
5352     for (i=0; i<nrecvs; i++){
5353       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5354       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5355       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5356     }
5357 
5358     /* pack the outgoing message */
5359     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5360     sstartsj[0] = 0;  rstartsj[0] = 0;
5361     len = 0; /* total length of j or a array to be sent */
5362     k = 0;
5363     for (i=0; i<nsends; i++){
5364       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5365       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5366       for (j=0; j<nrows; j++) {
5367         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5368         for (l=0; l<sbs; l++){
5369           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5370           rowlen[j*sbs+l] = ncols;
5371           len += ncols;
5372           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5373         }
5374         k++;
5375       }
5376       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5377       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5378     }
5379     /* recvs and sends of i-array are completed */
5380     i = nrecvs;
5381     while (i--) {
5382       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5383     }
5384     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5385 
5386     /* allocate buffers for sending j and a arrays */
5387     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5388     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5389 
5390     /* create i-array of B_oth */
5391     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5392     b_othi[0] = 0;
5393     len = 0; /* total length of j or a array to be received */
5394     k = 0;
5395     for (i=0; i<nrecvs; i++){
5396       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5397       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5398       for (j=0; j<nrows; j++) {
5399         b_othi[k+1] = b_othi[k] + rowlen[j];
5400         len += rowlen[j]; k++;
5401       }
5402       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5403     }
5404 
5405     /* allocate space for j and a arrrays of B_oth */
5406     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5407     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5408 
5409     /* j-array */
5410     /*---------*/
5411     /*  post receives of j-array */
5412     for (i=0; i<nrecvs; i++){
5413       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5414       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5415     }
5416 
5417     /* pack the outgoing message j-array */
5418     k = 0;
5419     for (i=0; i<nsends; i++){
5420       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5421       bufJ = bufj+sstartsj[i];
5422       for (j=0; j<nrows; j++) {
5423         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5424         for (ll=0; ll<sbs; ll++){
5425           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5426           for (l=0; l<ncols; l++){
5427             *bufJ++ = cols[l];
5428           }
5429           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5430         }
5431       }
5432       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5433     }
5434 
5435     /* recvs and sends of j-array are completed */
5436     i = nrecvs;
5437     while (i--) {
5438       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5439     }
5440     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5441   } else if (scall == MAT_REUSE_MATRIX){
5442     sstartsj = *startsj_s;
5443     rstartsj = *startsj_r;
5444     bufa     = *bufa_ptr;
5445     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5446     b_otha   = b_oth->a;
5447   } else {
5448     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5449   }
5450 
5451   /* a-array */
5452   /*---------*/
5453   /*  post receives of a-array */
5454   for (i=0; i<nrecvs; i++){
5455     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5456     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5457   }
5458 
5459   /* pack the outgoing message a-array */
5460   k = 0;
5461   for (i=0; i<nsends; i++){
5462     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5463     bufA = bufa+sstartsj[i];
5464     for (j=0; j<nrows; j++) {
5465       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5466       for (ll=0; ll<sbs; ll++){
5467         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5468         for (l=0; l<ncols; l++){
5469           *bufA++ = vals[l];
5470         }
5471         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5472       }
5473     }
5474     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5475   }
5476   /* recvs and sends of a-array are completed */
5477   i = nrecvs;
5478   while (i--) {
5479     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5480   }
5481   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5482   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5483 
5484   if (scall == MAT_INITIAL_MATRIX){
5485     /* put together the new matrix */
5486     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5487 
5488     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5489     /* Since these are PETSc arrays, change flags to free them as necessary. */
5490     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5491     b_oth->free_a  = PETSC_TRUE;
5492     b_oth->free_ij = PETSC_TRUE;
5493     b_oth->nonew   = 0;
5494 
5495     ierr = PetscFree(bufj);CHKERRQ(ierr);
5496     if (!startsj_s || !bufa_ptr){
5497       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5498       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5499     } else {
5500       *startsj_s = sstartsj;
5501       *startsj_r = rstartsj;
5502       *bufa_ptr  = bufa;
5503     }
5504   }
5505   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5506   PetscFunctionReturn(0);
5507 }
5508 
5509 #undef __FUNCT__
5510 #define __FUNCT__ "MatGetCommunicationStructs"
5511 /*@C
5512   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5513 
5514   Not Collective
5515 
5516   Input Parameters:
5517 . A - The matrix in mpiaij format
5518 
5519   Output Parameter:
5520 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5521 . colmap - A map from global column index to local index into lvec
5522 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5523 
5524   Level: developer
5525 
5526 @*/
5527 #if defined (PETSC_USE_CTABLE)
5528 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5529 #else
5530 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5531 #endif
5532 {
5533   Mat_MPIAIJ *a;
5534 
5535   PetscFunctionBegin;
5536   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5537   PetscValidPointer(lvec, 2);
5538   PetscValidPointer(colmap, 3);
5539   PetscValidPointer(multScatter, 4);
5540   a = (Mat_MPIAIJ *) A->data;
5541   if (lvec) *lvec = a->lvec;
5542   if (colmap) *colmap = a->colmap;
5543   if (multScatter) *multScatter = a->Mvctx;
5544   PetscFunctionReturn(0);
5545 }
5546 
5547 EXTERN_C_BEGIN
5548 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5549 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5550 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5551 EXTERN_C_END
5552 
5553 #undef __FUNCT__
5554 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5555 /*
5556     Computes (B'*A')' since computing B*A directly is untenable
5557 
5558                n                       p                          p
5559         (              )       (              )         (                  )
5560       m (      A       )  *  n (       B      )   =   m (         C        )
5561         (              )       (              )         (                  )
5562 
5563 */
5564 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5565 {
5566   PetscErrorCode     ierr;
5567   Mat                At,Bt,Ct;
5568 
5569   PetscFunctionBegin;
5570   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5571   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5572   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5573   ierr = MatDestroy(&At);CHKERRQ(ierr);
5574   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5575   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5576   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5577   PetscFunctionReturn(0);
5578 }
5579 
5580 #undef __FUNCT__
5581 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5582 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5583 {
5584   PetscErrorCode ierr;
5585   PetscInt       m=A->rmap->n,n=B->cmap->n;
5586   Mat            Cmat;
5587 
5588   PetscFunctionBegin;
5589   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5590   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5591   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5592   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5593   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5594   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5595   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5596   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5597 
5598   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5599   *C = Cmat;
5600   PetscFunctionReturn(0);
5601 }
5602 
5603 /* ----------------------------------------------------------------*/
5604 #undef __FUNCT__
5605 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5606 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5607 {
5608   PetscErrorCode ierr;
5609 
5610   PetscFunctionBegin;
5611   if (scall == MAT_INITIAL_MATRIX){
5612     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5613   }
5614   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5615   PetscFunctionReturn(0);
5616 }
5617 
5618 EXTERN_C_BEGIN
5619 #if defined(PETSC_HAVE_MUMPS)
5620 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5621 #endif
5622 #if defined(PETSC_HAVE_PASTIX)
5623 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5624 #endif
5625 #if defined(PETSC_HAVE_SUPERLU_DIST)
5626 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5627 #endif
5628 #if defined(PETSC_HAVE_CLIQUE)
5629 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5630 #endif
5631 EXTERN_C_END
5632 
5633 /*MC
5634    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5635 
5636    Options Database Keys:
5637 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5638 
5639   Level: beginner
5640 
5641 .seealso: MatCreateAIJ()
5642 M*/
5643 
5644 EXTERN_C_BEGIN
5645 #undef __FUNCT__
5646 #define __FUNCT__ "MatCreate_MPIAIJ"
5647 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5648 {
5649   Mat_MPIAIJ     *b;
5650   PetscErrorCode ierr;
5651   PetscMPIInt    size;
5652 
5653   PetscFunctionBegin;
5654   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5655   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5656   B->data         = (void*)b;
5657   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5658   B->assembled    = PETSC_FALSE;
5659   B->insertmode   = NOT_SET_VALUES;
5660   b->size         = size;
5661   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5662 
5663   /* build cache for off array entries formed */
5664   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5665   b->donotstash  = PETSC_FALSE;
5666   b->colmap      = 0;
5667   b->garray      = 0;
5668   b->roworiented = PETSC_TRUE;
5669 
5670   /* stuff used for matrix vector multiply */
5671   b->lvec      = PETSC_NULL;
5672   b->Mvctx     = PETSC_NULL;
5673 
5674   /* stuff for MatGetRow() */
5675   b->rowindices   = 0;
5676   b->rowvalues    = 0;
5677   b->getrowactive = PETSC_FALSE;
5678 
5679   /* flexible pointer used in CUSP/CUSPARSE classes */
5680   b->spptr        = PETSC_NULL;
5681 
5682 #if defined(PETSC_HAVE_MUMPS)
5683   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5684                                      "MatGetFactor_aij_mumps",
5685                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5686 #endif
5687 #if defined(PETSC_HAVE_PASTIX)
5688   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5689 					   "MatGetFactor_mpiaij_pastix",
5690 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5691 #endif
5692 #if defined(PETSC_HAVE_SUPERLU_DIST)
5693   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5694                                      "MatGetFactor_mpiaij_superlu_dist",
5695                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5696 #endif
5697 #if defined(PETSC_HAVE_CLIQUE)
5698   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5699                                      "MatGetFactor_aij_clique",
5700                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5701 #endif
5702   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5703                                      "MatStoreValues_MPIAIJ",
5704                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5705   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5706                                      "MatRetrieveValues_MPIAIJ",
5707                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5708   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5709 				     "MatGetDiagonalBlock_MPIAIJ",
5710                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5711   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5712 				     "MatIsTranspose_MPIAIJ",
5713 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5714   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5715 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5716 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5717   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5718 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5719 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5720   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5721 				     "MatDiagonalScaleLocal_MPIAIJ",
5722 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5723   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5724                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5725                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5726   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5727                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5728                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5729   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5730                                      "MatConvert_MPIAIJ_MPISBAIJ",
5731                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5732   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5733                                      "MatMatMult_MPIDense_MPIAIJ",
5734                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5735   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5736                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5737                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5738   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5739                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5740                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5741   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5742   PetscFunctionReturn(0);
5743 }
5744 EXTERN_C_END
5745 
5746 #undef __FUNCT__
5747 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5748 /*@
5749      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5750          and "off-diagonal" part of the matrix in CSR format.
5751 
5752    Collective on MPI_Comm
5753 
5754    Input Parameters:
5755 +  comm - MPI communicator
5756 .  m - number of local rows (Cannot be PETSC_DECIDE)
5757 .  n - This value should be the same as the local size used in creating the
5758        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5759        calculated if N is given) For square matrices n is almost always m.
5760 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5761 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5762 .   i - row indices for "diagonal" portion of matrix
5763 .   j - column indices
5764 .   a - matrix values
5765 .   oi - row indices for "off-diagonal" portion of matrix
5766 .   oj - column indices
5767 -   oa - matrix values
5768 
5769    Output Parameter:
5770 .   mat - the matrix
5771 
5772    Level: advanced
5773 
5774    Notes:
5775        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5776        must free the arrays once the matrix has been destroyed and not before.
5777 
5778        The i and j indices are 0 based
5779 
5780        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5781 
5782        This sets local rows and cannot be used to set off-processor values.
5783 
5784        You cannot later use MatSetValues() to change values in this matrix.
5785 
5786 .keywords: matrix, aij, compressed row, sparse, parallel
5787 
5788 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5789           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5790 @*/
5791 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5792 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5793 {
5794   PetscErrorCode ierr;
5795   Mat_MPIAIJ     *maij;
5796 
5797  PetscFunctionBegin;
5798   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5799   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5800   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5801   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5802   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5803   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5804   maij = (Mat_MPIAIJ*) (*mat)->data;
5805   maij->donotstash     = PETSC_TRUE;
5806   (*mat)->preallocated = PETSC_TRUE;
5807 
5808   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5809   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5810 
5811   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5812   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5813 
5814   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5815   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5816   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5817   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5818 
5819   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5820   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5821   PetscFunctionReturn(0);
5822 }
5823 
5824 /*
5825     Special version for direct calls from Fortran
5826 */
5827 #include <petsc-private/fortranimpl.h>
5828 
5829 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5830 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5831 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5832 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5833 #endif
5834 
5835 /* Change these macros so can be used in void function */
5836 #undef CHKERRQ
5837 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5838 #undef SETERRQ2
5839 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5840 #undef SETERRQ3
5841 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5842 #undef SETERRQ
5843 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5844 
5845 EXTERN_C_BEGIN
5846 #undef __FUNCT__
5847 #define __FUNCT__ "matsetvaluesmpiaij_"
5848 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5849 {
5850   Mat             mat = *mmat;
5851   PetscInt        m = *mm, n = *mn;
5852   InsertMode      addv = *maddv;
5853   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5854   PetscScalar     value;
5855   PetscErrorCode  ierr;
5856 
5857   MatCheckPreallocated(mat,1);
5858   if (mat->insertmode == NOT_SET_VALUES) {
5859     mat->insertmode = addv;
5860   }
5861 #if defined(PETSC_USE_DEBUG)
5862   else if (mat->insertmode != addv) {
5863     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5864   }
5865 #endif
5866   {
5867   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5868   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5869   PetscBool       roworiented = aij->roworiented;
5870 
5871   /* Some Variables required in the macro */
5872   Mat             A = aij->A;
5873   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5874   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5875   MatScalar       *aa = a->a;
5876   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5877   Mat             B = aij->B;
5878   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5879   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5880   MatScalar       *ba = b->a;
5881 
5882   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5883   PetscInt        nonew = a->nonew;
5884   MatScalar       *ap1,*ap2;
5885 
5886   PetscFunctionBegin;
5887   for (i=0; i<m; i++) {
5888     if (im[i] < 0) continue;
5889 #if defined(PETSC_USE_DEBUG)
5890     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5891 #endif
5892     if (im[i] >= rstart && im[i] < rend) {
5893       row      = im[i] - rstart;
5894       lastcol1 = -1;
5895       rp1      = aj + ai[row];
5896       ap1      = aa + ai[row];
5897       rmax1    = aimax[row];
5898       nrow1    = ailen[row];
5899       low1     = 0;
5900       high1    = nrow1;
5901       lastcol2 = -1;
5902       rp2      = bj + bi[row];
5903       ap2      = ba + bi[row];
5904       rmax2    = bimax[row];
5905       nrow2    = bilen[row];
5906       low2     = 0;
5907       high2    = nrow2;
5908 
5909       for (j=0; j<n; j++) {
5910         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5911         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5912         if (in[j] >= cstart && in[j] < cend){
5913           col = in[j] - cstart;
5914           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5915         } else if (in[j] < 0) continue;
5916 #if defined(PETSC_USE_DEBUG)
5917         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5918 #endif
5919         else {
5920           if (mat->was_assembled) {
5921             if (!aij->colmap) {
5922               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5923             }
5924 #if defined (PETSC_USE_CTABLE)
5925             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5926 	    col--;
5927 #else
5928             col = aij->colmap[in[j]] - 1;
5929 #endif
5930             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5931               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5932               col =  in[j];
5933               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5934               B = aij->B;
5935               b = (Mat_SeqAIJ*)B->data;
5936               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5937               rp2      = bj + bi[row];
5938               ap2      = ba + bi[row];
5939               rmax2    = bimax[row];
5940               nrow2    = bilen[row];
5941               low2     = 0;
5942               high2    = nrow2;
5943               bm       = aij->B->rmap->n;
5944               ba = b->a;
5945             }
5946           } else col = in[j];
5947           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5948         }
5949       }
5950     } else {
5951       if (!aij->donotstash) {
5952         if (roworiented) {
5953           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5954         } else {
5955           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5956         }
5957       }
5958     }
5959   }}
5960   PetscFunctionReturnVoid();
5961 }
5962 EXTERN_C_END
5963 
5964