xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e9e74f117c621b96e09c148470af0042e5cc1ff8)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233 	for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236 	  jj++;
237 	}
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267 	for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270 	  jj++;
271 	}
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++){
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else             low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414 { \
415     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416     lastcol2 = col;\
417     while (high2-low2 > 5) { \
418       t = (low2+high2)/2; \
419       if (rp2[t] > col) high2 = t; \
420       else             low2  = t; \
421     } \
422     for (_i=low2; _i<high2; _i++) {		\
423       if (rp2[_i] > col) break;			\
424       if (rp2[_i] == col) {			      \
425 	if (addv == ADD_VALUES) ap2[_i] += value;     \
426 	else                    ap2[_i] = value;      \
427 	goto b_noinsert;			      \
428       }						      \
429     }							      \
430     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
432     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434     N = nrow2++ - 1; b->nz++; high2++;					\
435     /* shift up all the later entries in this row */			\
436     for (ii=N; ii>=_i; ii--) {						\
437       rp2[ii+1] = rp2[ii];						\
438       ap2[ii+1] = ap2[ii];						\
439     }									\
440     rp2[_i] = col;							\
441     ap2[_i] = value;							\
442     b_noinsert: ;								\
443     bilen[row] = nrow2;							\
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat            A = aij->A;
487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
488   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar      *aa = a->a;
490   PetscBool      ignorezeroentries = a->ignorezeroentries;
491   Mat            B = aij->B;
492   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
493   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar      *ba = b->a;
495 
496   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt       nonew;
498   MatScalar      *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend){
528           col = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined (PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542 	    col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B = aij->B;
551               b = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2      = bj + bi[row];
554               ap2      = ba + bi[row];
555               rmax2    = bimax[row];
556               nrow2    = bilen[row];
557               low2     = 0;
558               high2    = nrow2;
559               bm       = aij->B->rmap->n;
560               ba = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend){
601           col = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined (PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col --;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       nstash,reallocs;
635   InsertMode     addv;
636 
637   PetscFunctionBegin;
638   if (aij->donotstash || mat->nooffprocentries) {
639     PetscFunctionReturn(0);
640   }
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n;) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724 {
725   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
730   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatZeroRows_MPIAIJ"
736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737 {
738   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode    ierr;
740   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741   PetscInt          i,*owners = A->rmap->range;
742   PetscInt          *nprocs,j,idx,nsends,row;
743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
744   PetscInt          *rvalues,count,base,slen,*source;
745   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
746   MPI_Comm          comm = ((PetscObject)A)->comm;
747   MPI_Request       *send_waits,*recv_waits;
748   MPI_Status        recv_status,*send_status;
749   const PetscScalar *xx;
750   PetscScalar       *bb;
751 #if defined(PETSC_DEBUG)
752   PetscBool      found = PETSC_FALSE;
753 #endif
754 
755   PetscFunctionBegin;
756   /*  first count number of contributors to each processor */
757   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
758   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
759   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
760   j = 0;
761   for (i=0; i<N; i++) {
762     if (lastidx > (idx = rows[i])) j = 0;
763     lastidx = idx;
764     for (; j<size; j++) {
765       if (idx >= owners[j] && idx < owners[j+1]) {
766         nprocs[2*j]++;
767         nprocs[2*j+1] = 1;
768         owner[i] = j;
769 #if defined(PETSC_DEBUG)
770         found = PETSC_TRUE;
771 #endif
772         break;
773       }
774     }
775 #if defined(PETSC_DEBUG)
776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777     found = PETSC_FALSE;
778 #endif
779   }
780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
781 
782   if (A->nooffproczerorows) {
783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784     nrecvs = nsends;
785     nmax   = N;
786   } else {
787     /* inform other processors of number of messages and max length*/
788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
789   }
790 
791   /* post receives:   */
792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
794   for (i=0; i<nrecvs; i++) {
795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
796   }
797 
798   /* do sends:
799       1) starts[i] gives the starting index in svalues for stuff going to
800          the ith processor
801   */
802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
805   starts[0] = 0;
806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807   for (i=0; i<N; i++) {
808     svalues[starts[owner[i]]++] = rows[i];
809   }
810 
811   starts[0] = 0;
812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813   count = 0;
814   for (i=0; i<size; i++) {
815     if (nprocs[2*i+1]) {
816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
817     }
818   }
819   ierr = PetscFree(starts);CHKERRQ(ierr);
820 
821   base = owners[rank];
822 
823   /*  wait on receives */
824   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
825   count  = nrecvs; slen = 0;
826   while (count) {
827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
828     /* unpack receives into our local space */
829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
830     source[imdex]  = recv_status.MPI_SOURCE;
831     lens[imdex]    = n;
832     slen          += n;
833     count--;
834   }
835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
836 
837   /* move the data into the send scatter */
838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
839   count = 0;
840   for (i=0; i<nrecvs; i++) {
841     values = rvalues + i*nmax;
842     for (j=0; j<lens[i]; j++) {
843       lrows[count++] = values[j] - base;
844     }
845   }
846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
848   ierr = PetscFree(owner);CHKERRQ(ierr);
849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
850 
851   /* fix right hand side if needed */
852   if (x && b) {
853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
855     for (i=0; i<slen; i++) {
856       bb[lrows[i]] = diag*xx[lrows[i]];
857     }
858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
860   }
861   /*
862         Zero the required rows. If the "diagonal block" of the matrix
863      is square and the user wishes to set the diagonal we use separate
864      code so that MatSetValues() is not called for each diagonal allocating
865      new memory, thus calling lots of mallocs and slowing things down.
866 
867   */
868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
872   } else if (diag != 0.0) {
873     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877     }
878     for (i = 0; i < slen; i++) {
879       row  = lrows[i] + rstart;
880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   } else {
885     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(lrows);CHKERRQ(ierr);
888 
889   /* wait on sends */
890   if (nsends) {
891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
893     ierr = PetscFree(send_status);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
896   ierr = PetscFree(svalues);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903 {
904   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
905   PetscErrorCode    ierr;
906   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907   PetscInt          i,*owners = A->rmap->range;
908   PetscInt          *nprocs,j,idx,nsends;
909   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
910   PetscInt          *rvalues,count,base,slen,*source;
911   PetscInt          *lens,*lrows,*values,m;
912   MPI_Comm          comm = ((PetscObject)A)->comm;
913   MPI_Request       *send_waits,*recv_waits;
914   MPI_Status        recv_status,*send_status;
915   const PetscScalar *xx;
916   PetscScalar       *bb,*mask;
917   Vec               xmask,lmask;
918   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
919   const PetscInt    *aj, *ii,*ridx;
920   PetscScalar       *aa;
921 #if defined(PETSC_DEBUG)
922   PetscBool         found = PETSC_FALSE;
923 #endif
924 
925   PetscFunctionBegin;
926   /*  first count number of contributors to each processor */
927   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
928   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
929   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
930   j = 0;
931   for (i=0; i<N; i++) {
932     if (lastidx > (idx = rows[i])) j = 0;
933     lastidx = idx;
934     for (; j<size; j++) {
935       if (idx >= owners[j] && idx < owners[j+1]) {
936         nprocs[2*j]++;
937         nprocs[2*j+1] = 1;
938         owner[i] = j;
939 #if defined(PETSC_DEBUG)
940         found = PETSC_TRUE;
941 #endif
942         break;
943       }
944     }
945 #if defined(PETSC_DEBUG)
946     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947     found = PETSC_FALSE;
948 #endif
949   }
950   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
951 
952   /* inform other processors of number of messages and max length*/
953   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
954 
955   /* post receives:   */
956   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
957   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
958   for (i=0; i<nrecvs; i++) {
959     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
960   }
961 
962   /* do sends:
963       1) starts[i] gives the starting index in svalues for stuff going to
964          the ith processor
965   */
966   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
967   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
968   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
969   starts[0] = 0;
970   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971   for (i=0; i<N; i++) {
972     svalues[starts[owner[i]]++] = rows[i];
973   }
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count  = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994     source[imdex]  = recv_status.MPI_SOURCE;
995     lens[imdex]    = n;
996     slen          += n;
997     count--;
998   }
999   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1000 
1001   /* move the data into the send scatter */
1002   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1003   count = 0;
1004   for (i=0; i<nrecvs; i++) {
1005     values = rvalues + i*nmax;
1006     for (j=0; j<lens[i]; j++) {
1007       lrows[count++] = values[j] - base;
1008     }
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) {
1024     bb[lrows[i]] = 1;
1025   }
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use){
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++){
1050       n   = ii[i+1] - ii[i];
1051       aj  = aij->j + ii[i];
1052       aa  = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa        = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n   = ii[i+1] - ii[i];
1068       aj  = aij->j + ii[i];
1069       aa  = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa    = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190 
1191   /* Easy test: symmetric diagonal block */
1192   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1194   if (!*f) PetscFunctionReturn(0);
1195   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1197   if (size == 1) PetscFunctionReturn(0);
1198 
1199   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1201   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1202   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1203   for (i=0; i<first; i++) notme[i] = i;
1204   for (i=last; i<M; i++) notme[i-last+first] = i;
1205   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1206   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1207   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1208   Aoff = Aoffs[0];
1209   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1210   Boff = Boffs[0];
1211   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1213   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1215   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1216   ierr = PetscFree(notme);CHKERRQ(ierr);
1217    PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   /* do nondiagonal part */
1230   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1231   /* send it on its way */
1232   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   /* do local part */
1234   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1235   /* receive remote parts */
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247 {
1248   PetscErrorCode ierr;
1249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1250 
1251   PetscFunctionBegin;
1252   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatScale_MPIAIJ"
1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261 {
1262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1267   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDestroy_MPIAIJ"
1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274 {
1275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279 #if defined(PETSC_USE_LOG)
1280   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281 #endif
1282   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1283   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1284   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1288 #else
1289   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1292   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1293   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1294   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1295   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1316   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1319   int               fd;
1320   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1321   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322   PetscScalar       *column_values;
1323   PetscInt          message_count,flowcontrolcount;
1324   FILE              *file;
1325 
1326   PetscFunctionBegin;
1327   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1328   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1329   nz   = A->nz + B->nz;
1330   if (!rank) {
1331     header[0] = MAT_FILE_CLASSID;
1332     header[1] = mat->rmap->N;
1333     header[2] = mat->cmap->N;
1334     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1335     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1336     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     /* get largest number of rows any processor has */
1338     rlen = mat->rmap->n;
1339     range = mat->rmap->range;
1340     for (i=1; i<size; i++) {
1341       rlen = PetscMax(rlen,range[i+1] - range[i]);
1342     }
1343   } else {
1344     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     rlen = mat->rmap->n;
1346   }
1347 
1348   /* load up the local row counts */
1349   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1350   for (i=0; i<mat->rmap->n; i++) {
1351     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352   }
1353 
1354   /* store the row lengths to the file */
1355   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1356   if (!rank) {
1357     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     for (i=1; i<size; i++) {
1359       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1360       rlen = range[i+1] - range[i];
1361       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1362       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1363     }
1364     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1367     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1368     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1369   }
1370   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1371 
1372   /* load up the local column indices */
1373   nzmax = nz; /* )th processor needs space a largest processor needs */
1374   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1376   cnt  = 0;
1377   for (i=0; i<mat->rmap->n; i++) {
1378     for (j=B->i[i]; j<B->i[i+1]; j++) {
1379       if ( (col = garray[B->j[j]]) > cstart) break;
1380       column_indices[cnt++] = col;
1381     }
1382     for (k=A->i[i]; k<A->i[i+1]; k++) {
1383       column_indices[cnt++] = A->j[k] + cstart;
1384     }
1385     for (; j<B->i[i+1]; j++) {
1386       column_indices[cnt++] = garray[B->j[j]];
1387     }
1388   }
1389   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1390 
1391   /* store the column indices to the file */
1392    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1393   if (!rank) {
1394     MPI_Status status;
1395     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1396     for (i=1; i<size; i++) {
1397       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1398       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1399       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1400       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1401       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1402     }
1403      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1406     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1408     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1409   }
1410   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1411 
1412   /* load up the local column values */
1413   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1414   cnt  = 0;
1415   for (i=0; i<mat->rmap->n; i++) {
1416     for (j=B->i[i]; j<B->i[i+1]; j++) {
1417       if ( garray[B->j[j]] > cstart) break;
1418       column_values[cnt++] = B->a[j];
1419     }
1420     for (k=A->i[i]; k<A->i[i+1]; k++) {
1421       column_values[cnt++] = A->a[k];
1422     }
1423     for (; j<B->i[i+1]; j++) {
1424       column_values[cnt++] = B->a[j];
1425     }
1426   }
1427   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1428 
1429   /* store the column values to the file */
1430    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1431   if (!rank) {
1432     MPI_Status status;
1433     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1434     for (i=1; i<size; i++) {
1435        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1436       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1437       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1438       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1439       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1440     }
1441     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1442   } else {
1443     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1444     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1445     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1446     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(column_values);CHKERRQ(ierr);
1449 
1450   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1451   if (file) {
1452     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1461 {
1462   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1463   PetscErrorCode    ierr;
1464   PetscMPIInt       rank = aij->rank,size = aij->size;
1465   PetscBool         isdraw,iascii,isbinary;
1466   PetscViewer       sviewer;
1467   PetscViewerFormat format;
1468 
1469   PetscFunctionBegin;
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1473   if (iascii) {
1474     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1475     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1476       MatInfo    info;
1477       PetscBool  inodes;
1478 
1479       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1480       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1482       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1483       if (!inodes) {
1484         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1485 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1486       } else {
1487         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1488 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1489       }
1490       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1492       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1493       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1494       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1495       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1497       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1498       PetscFunctionReturn(0);
1499     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1500       PetscInt   inodecount,inodelimit,*inodes;
1501       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1502       if (inodes) {
1503         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1504       } else {
1505         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1506       }
1507       PetscFunctionReturn(0);
1508     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510     }
1511   } else if (isbinary) {
1512     if (size == 1) {
1513       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1515     } else {
1516       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1517     }
1518     PetscFunctionReturn(0);
1519   } else if (isdraw) {
1520     PetscDraw  draw;
1521     PetscBool  isnull;
1522     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1523     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1524   }
1525 
1526   if (size == 1) {
1527     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1528     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1529   } else {
1530     /* assemble the entire matrix onto first processor. */
1531     Mat         A;
1532     Mat_SeqAIJ  *Aloc;
1533     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1534     MatScalar   *a;
1535 
1536     if (mat->rmap->N > 1024) {
1537       PetscBool  flg = PETSC_FALSE;
1538 
1539       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1540       if (!flg) {
1541         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1542       }
1543     }
1544 
1545     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1546     if (!rank) {
1547       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1548     } else {
1549       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1550     }
1551     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1552     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1553     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1554     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1555     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1556 
1557     /* copy over the A part */
1558     Aloc = (Mat_SeqAIJ*)aij->A->data;
1559     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1560     row = mat->rmap->rstart;
1561     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1562     for (i=0; i<m; i++) {
1563       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1564       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1565     }
1566     aj = Aloc->j;
1567     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1568 
1569     /* copy over the B part */
1570     Aloc = (Mat_SeqAIJ*)aij->B->data;
1571     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1572     row  = mat->rmap->rstart;
1573     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1574     ct   = cols;
1575     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1576     for (i=0; i<m; i++) {
1577       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1578       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1579     }
1580     ierr = PetscFree(ct);CHKERRQ(ierr);
1581     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583     /*
1584        Everyone has to call to draw the matrix since the graphics waits are
1585        synchronized across all processors that share the PetscDraw object
1586     */
1587     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1588     if (!rank) {
1589       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1590       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1591       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1592       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1593     }
1594     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1595     ierr = MatDestroy(&A);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatView_MPIAIJ"
1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1603 {
1604   PetscErrorCode ierr;
1605   PetscBool      iascii,isdraw,issocket,isbinary;
1606 
1607   PetscFunctionBegin;
1608   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1609   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1612   if (iascii || isdraw || isbinary || issocket) {
1613     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1614   } else {
1615     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatSOR_MPIAIJ"
1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   PetscErrorCode ierr;
1626   Vec            bb1 = 0;
1627   PetscBool      hasop;
1628 
1629   PetscFunctionBegin;
1630   if (flag == SOR_APPLY_UPPER) {
1631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1632     PetscFunctionReturn(0);
1633   }
1634 
1635   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1636     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1637   }
1638 
1639   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644 
1645     while (its--) {
1646       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648 
1649       /* update rhs: bb1 = bb - B*x */
1650       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1651       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1652 
1653       /* local sweep */
1654       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1655     }
1656   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1657     if (flag & SOR_ZERO_INITIAL_GUESS) {
1658       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1659       its--;
1660     }
1661     while (its--) {
1662       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664 
1665       /* update rhs: bb1 = bb - B*x */
1666       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1667       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1668 
1669       /* local sweep */
1670       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1671     }
1672   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1673     if (flag & SOR_ZERO_INITIAL_GUESS) {
1674       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1675       its--;
1676     }
1677     while (its--) {
1678       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680 
1681       /* update rhs: bb1 = bb - B*x */
1682       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1683       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1684 
1685       /* local sweep */
1686       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1687     }
1688   }  else if (flag & SOR_EISENSTAT) {
1689     Vec         xx1;
1690 
1691     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1693 
1694     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696     if (!mat->diag) {
1697       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1698       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1699     }
1700     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1701     if (hasop) {
1702       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1703     } else {
1704       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1705     }
1706     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1707 
1708     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1709 
1710     /* local sweep */
1711     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1712     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1713     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1715 
1716   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatPermute_MPIAIJ"
1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1723 {
1724   Mat            aA,aB,Aperm;
1725   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1726   PetscScalar    *aa,*ba;
1727   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1728   PetscSF        rowsf,sf;
1729   IS             parcolp = PETSC_NULL;
1730   PetscBool      done;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1735   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1736   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1737   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1738 
1739   /* Invert row permutation to find out where my rows should go */
1740   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1741   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1742   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1743   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1744   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1745   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1746 
1747   /* Invert column permutation to find out where my columns should go */
1748   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1749   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1750   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1751   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1752   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1753   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1754   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1755 
1756   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1757   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1758   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1759 
1760   /* Find out where my gcols should go */
1761   ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr);
1762   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1763   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1764   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1765   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1766   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1767   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1768   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1769 
1770   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1771   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1772   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1773   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1774   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1775   for (i=0; i<m; i++) {
1776     PetscInt row = rdest[i],rowner;
1777     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1778     for (j=ai[i]; j<ai[i+1]; j++) {
1779       PetscInt cowner,col = cdest[aj[j]];
1780       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1781       if (rowner == cowner) dnnz[i]++;
1782       else onnz[i]++;
1783     }
1784     for (j=bi[i]; j<bi[i+1]; j++) {
1785       PetscInt cowner,col = gcdest[bj[j]];
1786       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1787       if (rowner == cowner) dnnz[i]++;
1788       else onnz[i]++;
1789     }
1790   }
1791   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1792   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1793   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1794   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1795   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1796   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1797   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1798 
1799   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1800   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1801   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1802   for (i=0; i<m; i++) {
1803     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1804     PetscInt rowlen;
1805     rowlen = ai[i+1] - ai[i];
1806     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1807     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1808     rowlen = bi[i+1] - bi[i];
1809     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1810     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1811   }
1812   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1813   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1814   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1815   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1816   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1817   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1818   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1819   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1820   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1821   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1822   *B = Aperm;
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 #undef __FUNCT__
1827 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1828 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1829 {
1830   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1831   Mat            A = mat->A,B = mat->B;
1832   PetscErrorCode ierr;
1833   PetscReal      isend[5],irecv[5];
1834 
1835   PetscFunctionBegin;
1836   info->block_size     = 1.0;
1837   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1838   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1839   isend[3] = info->memory;  isend[4] = info->mallocs;
1840   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1841   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1842   isend[3] += info->memory;  isend[4] += info->mallocs;
1843   if (flag == MAT_LOCAL) {
1844     info->nz_used      = isend[0];
1845     info->nz_allocated = isend[1];
1846     info->nz_unneeded  = isend[2];
1847     info->memory       = isend[3];
1848     info->mallocs      = isend[4];
1849   } else if (flag == MAT_GLOBAL_MAX) {
1850     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1851     info->nz_used      = irecv[0];
1852     info->nz_allocated = irecv[1];
1853     info->nz_unneeded  = irecv[2];
1854     info->memory       = irecv[3];
1855     info->mallocs      = irecv[4];
1856   } else if (flag == MAT_GLOBAL_SUM) {
1857     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1858     info->nz_used      = irecv[0];
1859     info->nz_allocated = irecv[1];
1860     info->nz_unneeded  = irecv[2];
1861     info->memory       = irecv[3];
1862     info->mallocs      = irecv[4];
1863   }
1864   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1865   info->fill_ratio_needed = 0;
1866   info->factor_mallocs    = 0;
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatSetOption_MPIAIJ"
1872 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1873 {
1874   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1875   PetscErrorCode ierr;
1876 
1877   PetscFunctionBegin;
1878   switch (op) {
1879   case MAT_NEW_NONZERO_LOCATIONS:
1880   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1881   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1882   case MAT_KEEP_NONZERO_PATTERN:
1883   case MAT_NEW_NONZERO_LOCATION_ERR:
1884   case MAT_USE_INODES:
1885   case MAT_IGNORE_ZERO_ENTRIES:
1886     MatCheckPreallocated(A,1);
1887     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1888     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1889     break;
1890   case MAT_ROW_ORIENTED:
1891     a->roworiented = flg;
1892     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1893     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1894     break;
1895   case MAT_NEW_DIAGONALS:
1896     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1897     break;
1898   case MAT_IGNORE_OFF_PROC_ENTRIES:
1899     a->donotstash = flg;
1900     break;
1901   case MAT_SPD:
1902     A->spd_set                         = PETSC_TRUE;
1903     A->spd                             = flg;
1904     if (flg) {
1905       A->symmetric                     = PETSC_TRUE;
1906       A->structurally_symmetric        = PETSC_TRUE;
1907       A->symmetric_set                 = PETSC_TRUE;
1908       A->structurally_symmetric_set    = PETSC_TRUE;
1909     }
1910     break;
1911   case MAT_SYMMETRIC:
1912     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1913     break;
1914   case MAT_STRUCTURALLY_SYMMETRIC:
1915     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1916     break;
1917   case MAT_HERMITIAN:
1918     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1919     break;
1920   case MAT_SYMMETRY_ETERNAL:
1921     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1922     break;
1923   default:
1924     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1925   }
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatGetRow_MPIAIJ"
1931 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1932 {
1933   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1934   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1935   PetscErrorCode ierr;
1936   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1937   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1938   PetscInt       *cmap,*idx_p;
1939 
1940   PetscFunctionBegin;
1941   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1942   mat->getrowactive = PETSC_TRUE;
1943 
1944   if (!mat->rowvalues && (idx || v)) {
1945     /*
1946         allocate enough space to hold information from the longest row.
1947     */
1948     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1949     PetscInt   max = 1,tmp;
1950     for (i=0; i<matin->rmap->n; i++) {
1951       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1952       if (max < tmp) { max = tmp; }
1953     }
1954     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1955   }
1956 
1957   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1958   lrow = row - rstart;
1959 
1960   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1961   if (!v)   {pvA = 0; pvB = 0;}
1962   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1963   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1964   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1965   nztot = nzA + nzB;
1966 
1967   cmap  = mat->garray;
1968   if (v  || idx) {
1969     if (nztot) {
1970       /* Sort by increasing column numbers, assuming A and B already sorted */
1971       PetscInt imark = -1;
1972       if (v) {
1973         *v = v_p = mat->rowvalues;
1974         for (i=0; i<nzB; i++) {
1975           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1976           else break;
1977         }
1978         imark = i;
1979         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1980         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1981       }
1982       if (idx) {
1983         *idx = idx_p = mat->rowindices;
1984         if (imark > -1) {
1985           for (i=0; i<imark; i++) {
1986             idx_p[i] = cmap[cworkB[i]];
1987           }
1988         } else {
1989           for (i=0; i<nzB; i++) {
1990             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1991             else break;
1992           }
1993           imark = i;
1994         }
1995         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1996         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1997       }
1998     } else {
1999       if (idx) *idx = 0;
2000       if (v)   *v   = 0;
2001     }
2002   }
2003   *nz = nztot;
2004   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
2005   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
2011 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2012 {
2013   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2014 
2015   PetscFunctionBegin;
2016   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2017   aij->getrowactive = PETSC_FALSE;
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 #undef __FUNCT__
2022 #define __FUNCT__ "MatNorm_MPIAIJ"
2023 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2024 {
2025   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2026   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2027   PetscErrorCode ierr;
2028   PetscInt       i,j,cstart = mat->cmap->rstart;
2029   PetscReal      sum = 0.0;
2030   MatScalar      *v;
2031 
2032   PetscFunctionBegin;
2033   if (aij->size == 1) {
2034     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2035   } else {
2036     if (type == NORM_FROBENIUS) {
2037       v = amat->a;
2038       for (i=0; i<amat->nz; i++) {
2039 #if defined(PETSC_USE_COMPLEX)
2040         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2041 #else
2042         sum += (*v)*(*v); v++;
2043 #endif
2044       }
2045       v = bmat->a;
2046       for (i=0; i<bmat->nz; i++) {
2047 #if defined(PETSC_USE_COMPLEX)
2048         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2049 #else
2050         sum += (*v)*(*v); v++;
2051 #endif
2052       }
2053       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2054       *norm = PetscSqrtReal(*norm);
2055     } else if (type == NORM_1) { /* max column norm */
2056       PetscReal *tmp,*tmp2;
2057       PetscInt  *jj,*garray = aij->garray;
2058       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2059       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2060       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2061       *norm = 0.0;
2062       v = amat->a; jj = amat->j;
2063       for (j=0; j<amat->nz; j++) {
2064         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2065       }
2066       v = bmat->a; jj = bmat->j;
2067       for (j=0; j<bmat->nz; j++) {
2068         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2069       }
2070       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2071       for (j=0; j<mat->cmap->N; j++) {
2072         if (tmp2[j] > *norm) *norm = tmp2[j];
2073       }
2074       ierr = PetscFree(tmp);CHKERRQ(ierr);
2075       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2076     } else if (type == NORM_INFINITY) { /* max row norm */
2077       PetscReal ntemp = 0.0;
2078       for (j=0; j<aij->A->rmap->n; j++) {
2079         v = amat->a + amat->i[j];
2080         sum = 0.0;
2081         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2082           sum += PetscAbsScalar(*v); v++;
2083         }
2084         v = bmat->a + bmat->i[j];
2085         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2086           sum += PetscAbsScalar(*v); v++;
2087         }
2088         if (sum > ntemp) ntemp = sum;
2089       }
2090       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2091     } else {
2092       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2093     }
2094   }
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatTranspose_MPIAIJ"
2100 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2101 {
2102   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2103   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2104   PetscErrorCode ierr;
2105   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2106   PetscInt       cstart=A->cmap->rstart,ncol;
2107   Mat            B;
2108   MatScalar      *array;
2109 
2110   PetscFunctionBegin;
2111   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2112 
2113   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2114   ai = Aloc->i; aj = Aloc->j;
2115   bi = Bloc->i; bj = Bloc->j;
2116   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2117     PetscInt *d_nnz,*g_nnz,*o_nnz;
2118     PetscSFNode *oloc;
2119     PETSC_UNUSED PetscSF sf;
2120 
2121     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2122     /* compute d_nnz for preallocation */
2123     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2124     for (i=0; i<ai[ma]; i++) {
2125       d_nnz[aj[i]] ++;
2126       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2127     }
2128     /* compute local off-diagonal contributions */
2129     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2130     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2131     /* map those to global */
2132     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2133     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2134     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2135     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2136     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2137     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2138     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2139 
2140     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2141     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2142     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2143     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2144     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2145     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2146   } else {
2147     B = *matout;
2148     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2149     for (i=0; i<ai[ma]; i++){
2150       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2151     }
2152   }
2153 
2154   /* copy over the A part */
2155   array = Aloc->a;
2156   row = A->rmap->rstart;
2157   for (i=0; i<ma; i++) {
2158     ncol = ai[i+1]-ai[i];
2159     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2160     row++; array += ncol; aj += ncol;
2161   }
2162   aj = Aloc->j;
2163   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2164 
2165   /* copy over the B part */
2166   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2167   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2168   array = Bloc->a;
2169   row = A->rmap->rstart;
2170   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2171   cols_tmp = cols;
2172   for (i=0; i<mb; i++) {
2173     ncol = bi[i+1]-bi[i];
2174     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2175     row++; array += ncol; cols_tmp += ncol;
2176   }
2177   ierr = PetscFree(cols);CHKERRQ(ierr);
2178 
2179   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2180   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2181   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2182     *matout = B;
2183   } else {
2184     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2185   }
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2191 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2192 {
2193   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2194   Mat            a = aij->A,b = aij->B;
2195   PetscErrorCode ierr;
2196   PetscInt       s1,s2,s3;
2197 
2198   PetscFunctionBegin;
2199   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2200   if (rr) {
2201     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2202     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2203     /* Overlap communication with computation. */
2204     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2205   }
2206   if (ll) {
2207     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2208     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2209     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2210   }
2211   /* scale  the diagonal block */
2212   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2213 
2214   if (rr) {
2215     /* Do a scatter end and then right scale the off-diagonal block */
2216     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2217     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2218   }
2219 
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 #undef __FUNCT__
2224 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2225 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2226 {
2227   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2228   PetscErrorCode ierr;
2229 
2230   PetscFunctionBegin;
2231   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 #undef __FUNCT__
2236 #define __FUNCT__ "MatEqual_MPIAIJ"
2237 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2238 {
2239   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2240   Mat            a,b,c,d;
2241   PetscBool      flg;
2242   PetscErrorCode ierr;
2243 
2244   PetscFunctionBegin;
2245   a = matA->A; b = matA->B;
2246   c = matB->A; d = matB->B;
2247 
2248   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2249   if (flg) {
2250     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2251   }
2252   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatCopy_MPIAIJ"
2258 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2259 {
2260   PetscErrorCode ierr;
2261   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2262   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2263 
2264   PetscFunctionBegin;
2265   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2266   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2267     /* because of the column compression in the off-processor part of the matrix a->B,
2268        the number of columns in a->B and b->B may be different, hence we cannot call
2269        the MatCopy() directly on the two parts. If need be, we can provide a more
2270        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2271        then copying the submatrices */
2272     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2273   } else {
2274     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2275     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2276   }
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 #undef __FUNCT__
2281 #define __FUNCT__ "MatSetUp_MPIAIJ"
2282 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2283 {
2284   PetscErrorCode ierr;
2285 
2286   PetscFunctionBegin;
2287   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2293 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2294 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2295 {
2296   PetscInt          i,m=Y->rmap->N;
2297   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2298   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2299   const PetscInt    *xi = x->i,*yi = y->i;
2300 
2301   PetscFunctionBegin;
2302   /* Set the number of nonzeros in the new matrix */
2303   for (i=0; i<m; i++) {
2304     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2305     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2306     nnz[i] = 0;
2307     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2308       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2309       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2310       nnz[i]++;
2311     }
2312     for (; k<nzy; k++) nnz[i]++;
2313   }
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatAXPY_MPIAIJ"
2319 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2320 {
2321   PetscErrorCode ierr;
2322   PetscInt       i;
2323   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2324   PetscBLASInt   bnz,one=1;
2325   Mat_SeqAIJ     *x,*y;
2326 
2327   PetscFunctionBegin;
2328   if (str == SAME_NONZERO_PATTERN) {
2329     PetscScalar alpha = a;
2330     x = (Mat_SeqAIJ *)xx->A->data;
2331     y = (Mat_SeqAIJ *)yy->A->data;
2332     bnz = PetscBLASIntCast(x->nz);
2333     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2334     x = (Mat_SeqAIJ *)xx->B->data;
2335     y = (Mat_SeqAIJ *)yy->B->data;
2336     bnz = PetscBLASIntCast(x->nz);
2337     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2338   } else if (str == SUBSET_NONZERO_PATTERN) {
2339     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2340 
2341     x = (Mat_SeqAIJ *)xx->B->data;
2342     y = (Mat_SeqAIJ *)yy->B->data;
2343     if (y->xtoy && y->XtoY != xx->B) {
2344       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2345       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2346     }
2347     if (!y->xtoy) { /* get xtoy */
2348       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2349       y->XtoY = xx->B;
2350       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2351     }
2352     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2353   } else {
2354     Mat B;
2355     PetscInt *nnz_d,*nnz_o;
2356     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2357     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2358     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2359     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2360     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2361     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2362     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2363     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2364     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2365     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2366     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2367     ierr = MatHeaderReplace(Y,B);
2368     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2369     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2370   }
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatConjugate_MPIAIJ"
2378 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2379 {
2380 #if defined(PETSC_USE_COMPLEX)
2381   PetscErrorCode ierr;
2382   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2383 
2384   PetscFunctionBegin;
2385   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2386   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2387 #else
2388   PetscFunctionBegin;
2389 #endif
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatRealPart_MPIAIJ"
2395 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2396 {
2397   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2398   PetscErrorCode ierr;
2399 
2400   PetscFunctionBegin;
2401   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2402   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2408 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2409 {
2410   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2411   PetscErrorCode ierr;
2412 
2413   PetscFunctionBegin;
2414   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2415   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 #ifdef PETSC_HAVE_PBGL
2420 
2421 #include <boost/parallel/mpi/bsp_process_group.hpp>
2422 #include <boost/graph/distributed/ilu_default_graph.hpp>
2423 #include <boost/graph/distributed/ilu_0_block.hpp>
2424 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2425 #include <boost/graph/distributed/petsc/interface.hpp>
2426 #include <boost/multi_array.hpp>
2427 #include <boost/parallel/distributed_property_map->hpp>
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2431 /*
2432   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2433 */
2434 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2435 {
2436   namespace petsc = boost::distributed::petsc;
2437 
2438   namespace graph_dist = boost::graph::distributed;
2439   using boost::graph::distributed::ilu_default::process_group_type;
2440   using boost::graph::ilu_permuted;
2441 
2442   PetscBool       row_identity, col_identity;
2443   PetscContainer  c;
2444   PetscInt        m, n, M, N;
2445   PetscErrorCode  ierr;
2446 
2447   PetscFunctionBegin;
2448   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2449   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2450   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2451   if (!row_identity || !col_identity) {
2452     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2453   }
2454 
2455   process_group_type pg;
2456   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2457   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2458   lgraph_type&   level_graph = *lgraph_p;
2459   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2460 
2461   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2462   ilu_permuted(level_graph);
2463 
2464   /* put together the new matrix */
2465   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2466   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2467   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2468   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2469   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2470   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2471   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2472   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2473 
2474   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2475   ierr = PetscContainerSetPointer(c, lgraph_p);
2476   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2477   ierr = PetscContainerDestroy(&c);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2483 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2484 {
2485   PetscFunctionBegin;
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 #undef __FUNCT__
2490 #define __FUNCT__ "MatSolve_MPIAIJ"
2491 /*
2492   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2493 */
2494 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2495 {
2496   namespace graph_dist = boost::graph::distributed;
2497 
2498   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2499   lgraph_type*   lgraph_p;
2500   PetscContainer c;
2501   PetscErrorCode ierr;
2502 
2503   PetscFunctionBegin;
2504   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2505   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2506   ierr = VecCopy(b, x);CHKERRQ(ierr);
2507 
2508   PetscScalar* array_x;
2509   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2510   PetscInt sx;
2511   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2512 
2513   PetscScalar* array_b;
2514   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2515   PetscInt sb;
2516   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2517 
2518   lgraph_type&   level_graph = *lgraph_p;
2519   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2520 
2521   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2522   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2523                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2524 
2525   typedef boost::iterator_property_map<array_ref_type::iterator,
2526                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2527   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2528                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2529 
2530   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2531 
2532   PetscFunctionReturn(0);
2533 }
2534 #endif
2535 
2536 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2537   PetscInt       nzlocal,nsends,nrecvs;
2538   PetscMPIInt    *send_rank,*recv_rank;
2539   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2540   PetscScalar    *sbuf_a,**rbuf_a;
2541   PetscErrorCode (*Destroy)(Mat);
2542 } Mat_Redundant;
2543 
2544 #undef __FUNCT__
2545 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2546 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2547 {
2548   PetscErrorCode       ierr;
2549   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2550   PetscInt             i;
2551 
2552   PetscFunctionBegin;
2553   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2554   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2555   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2556   for (i=0; i<redund->nrecvs; i++){
2557     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2558     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2559   }
2560   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2561   ierr = PetscFree(redund);CHKERRQ(ierr);
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatDestroy_MatRedundant"
2567 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2568 {
2569   PetscErrorCode  ierr;
2570   PetscContainer  container;
2571   Mat_Redundant   *redund=PETSC_NULL;
2572 
2573   PetscFunctionBegin;
2574   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2575   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2576   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2577   A->ops->destroy = redund->Destroy;
2578   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2579   if (A->ops->destroy) {
2580     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2581   }
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 #undef __FUNCT__
2586 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2587 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2588 {
2589   PetscMPIInt    rank,size;
2590   MPI_Comm       comm=((PetscObject)mat)->comm;
2591   PetscErrorCode ierr;
2592   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2593   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2594   PetscInt       *rowrange=mat->rmap->range;
2595   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2596   Mat            A=aij->A,B=aij->B,C=*matredundant;
2597   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2598   PetscScalar    *sbuf_a;
2599   PetscInt       nzlocal=a->nz+b->nz;
2600   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2601   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2602   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2603   MatScalar      *aworkA,*aworkB;
2604   PetscScalar    *vals;
2605   PetscMPIInt    tag1,tag2,tag3,imdex;
2606   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2607                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2608   MPI_Status     recv_status,*send_status;
2609   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2610   PetscInt       **rbuf_j=PETSC_NULL;
2611   PetscScalar    **rbuf_a=PETSC_NULL;
2612   Mat_Redundant  *redund=PETSC_NULL;
2613   PetscContainer container;
2614 
2615   PetscFunctionBegin;
2616   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2617   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2618 
2619   if (reuse == MAT_REUSE_MATRIX) {
2620     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2621     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2622     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2623     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2624     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2625     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2626     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2627     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2628 
2629     nsends    = redund->nsends;
2630     nrecvs    = redund->nrecvs;
2631     send_rank = redund->send_rank;
2632     recv_rank = redund->recv_rank;
2633     sbuf_nz   = redund->sbuf_nz;
2634     rbuf_nz   = redund->rbuf_nz;
2635     sbuf_j    = redund->sbuf_j;
2636     sbuf_a    = redund->sbuf_a;
2637     rbuf_j    = redund->rbuf_j;
2638     rbuf_a    = redund->rbuf_a;
2639   }
2640 
2641   if (reuse == MAT_INITIAL_MATRIX){
2642     PetscMPIInt  subrank,subsize;
2643     PetscInt     nleftover,np_subcomm;
2644     /* get the destination processors' id send_rank, nsends and nrecvs */
2645     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2646     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2647     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2648     np_subcomm = size/nsubcomm;
2649     nleftover  = size - nsubcomm*np_subcomm;
2650     nsends = 0; nrecvs = 0;
2651     for (i=0; i<size; i++){ /* i=rank*/
2652       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2653         send_rank[nsends] = i; nsends++;
2654         recv_rank[nrecvs++] = i;
2655       }
2656     }
2657     if (rank >= size - nleftover){/* this proc is a leftover processor */
2658       i = size-nleftover-1;
2659       j = 0;
2660       while (j < nsubcomm - nleftover){
2661         send_rank[nsends++] = i;
2662         i--; j++;
2663       }
2664     }
2665 
2666     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2667       for (i=0; i<nleftover; i++){
2668         recv_rank[nrecvs++] = size-nleftover+i;
2669       }
2670     }
2671 
2672     /* allocate sbuf_j, sbuf_a */
2673     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2674     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2675     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2676   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2677 
2678   /* copy mat's local entries into the buffers */
2679   if (reuse == MAT_INITIAL_MATRIX){
2680     rownz_max = 0;
2681     rptr = sbuf_j;
2682     cols = sbuf_j + rend-rstart + 1;
2683     vals = sbuf_a;
2684     rptr[0] = 0;
2685     for (i=0; i<rend-rstart; i++){
2686       row = i + rstart;
2687       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2688       ncols  = nzA + nzB;
2689       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2690       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2691       /* load the column indices for this row into cols */
2692       lwrite = 0;
2693       for (l=0; l<nzB; l++) {
2694         if ((ctmp = bmap[cworkB[l]]) < cstart){
2695           vals[lwrite]   = aworkB[l];
2696           cols[lwrite++] = ctmp;
2697         }
2698       }
2699       for (l=0; l<nzA; l++){
2700         vals[lwrite]   = aworkA[l];
2701         cols[lwrite++] = cstart + cworkA[l];
2702       }
2703       for (l=0; l<nzB; l++) {
2704         if ((ctmp = bmap[cworkB[l]]) >= cend){
2705           vals[lwrite]   = aworkB[l];
2706           cols[lwrite++] = ctmp;
2707         }
2708       }
2709       vals += ncols;
2710       cols += ncols;
2711       rptr[i+1] = rptr[i] + ncols;
2712       if (rownz_max < ncols) rownz_max = ncols;
2713     }
2714     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2715   } else { /* only copy matrix values into sbuf_a */
2716     rptr = sbuf_j;
2717     vals = sbuf_a;
2718     rptr[0] = 0;
2719     for (i=0; i<rend-rstart; i++){
2720       row = i + rstart;
2721       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2722       ncols  = nzA + nzB;
2723       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2724       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2725       lwrite = 0;
2726       for (l=0; l<nzB; l++) {
2727         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2728       }
2729       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2730       for (l=0; l<nzB; l++) {
2731         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2732       }
2733       vals += ncols;
2734       rptr[i+1] = rptr[i] + ncols;
2735     }
2736   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2737 
2738   /* send nzlocal to others, and recv other's nzlocal */
2739   /*--------------------------------------------------*/
2740   if (reuse == MAT_INITIAL_MATRIX){
2741     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2742     s_waits2 = s_waits3 + nsends;
2743     s_waits1 = s_waits2 + nsends;
2744     r_waits1 = s_waits1 + nsends;
2745     r_waits2 = r_waits1 + nrecvs;
2746     r_waits3 = r_waits2 + nrecvs;
2747   } else {
2748     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2749     r_waits3 = s_waits3 + nsends;
2750   }
2751 
2752   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2753   if (reuse == MAT_INITIAL_MATRIX){
2754     /* get new tags to keep the communication clean */
2755     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2756     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2757     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2758 
2759     /* post receives of other's nzlocal */
2760     for (i=0; i<nrecvs; i++){
2761       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2762     }
2763     /* send nzlocal to others */
2764     for (i=0; i<nsends; i++){
2765       sbuf_nz[i] = nzlocal;
2766       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2767     }
2768     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2769     count = nrecvs;
2770     while (count) {
2771       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2772       recv_rank[imdex] = recv_status.MPI_SOURCE;
2773       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2774       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2775 
2776       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2777       rbuf_nz[imdex] += i + 2;
2778       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2779       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2780       count--;
2781     }
2782     /* wait on sends of nzlocal */
2783     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2784     /* send mat->i,j to others, and recv from other's */
2785     /*------------------------------------------------*/
2786     for (i=0; i<nsends; i++){
2787       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2788       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2789     }
2790     /* wait on receives of mat->i,j */
2791     /*------------------------------*/
2792     count = nrecvs;
2793     while (count) {
2794       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2795       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2796       count--;
2797     }
2798     /* wait on sends of mat->i,j */
2799     /*---------------------------*/
2800     if (nsends) {
2801       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2802     }
2803   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2804 
2805   /* post receives, send and receive mat->a */
2806   /*----------------------------------------*/
2807   for (imdex=0; imdex<nrecvs; imdex++) {
2808     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2809   }
2810   for (i=0; i<nsends; i++){
2811     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2812   }
2813   count = nrecvs;
2814   while (count) {
2815     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2816     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2817     count--;
2818   }
2819   if (nsends) {
2820     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2821   }
2822 
2823   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2824 
2825   /* create redundant matrix */
2826   /*-------------------------*/
2827   if (reuse == MAT_INITIAL_MATRIX){
2828     /* compute rownz_max for preallocation */
2829     for (imdex=0; imdex<nrecvs; imdex++){
2830       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2831       rptr = rbuf_j[imdex];
2832       for (i=0; i<j; i++){
2833         ncols = rptr[i+1] - rptr[i];
2834         if (rownz_max < ncols) rownz_max = ncols;
2835       }
2836     }
2837 
2838     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2839     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2840     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2841     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2842     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2843     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2844   } else {
2845     C = *matredundant;
2846   }
2847 
2848   /* insert local matrix entries */
2849   rptr = sbuf_j;
2850   cols = sbuf_j + rend-rstart + 1;
2851   vals = sbuf_a;
2852   for (i=0; i<rend-rstart; i++){
2853     row   = i + rstart;
2854     ncols = rptr[i+1] - rptr[i];
2855     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2856     vals += ncols;
2857     cols += ncols;
2858   }
2859   /* insert received matrix entries */
2860   for (imdex=0; imdex<nrecvs; imdex++){
2861     rstart = rowrange[recv_rank[imdex]];
2862     rend   = rowrange[recv_rank[imdex]+1];
2863     rptr = rbuf_j[imdex];
2864     cols = rbuf_j[imdex] + rend-rstart + 1;
2865     vals = rbuf_a[imdex];
2866     for (i=0; i<rend-rstart; i++){
2867       row   = i + rstart;
2868       ncols = rptr[i+1] - rptr[i];
2869       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2870       vals += ncols;
2871       cols += ncols;
2872     }
2873   }
2874   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2875   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2876   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2877   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2878   if (reuse == MAT_INITIAL_MATRIX) {
2879     PetscContainer container;
2880     *matredundant = C;
2881     /* create a supporting struct and attach it to C for reuse */
2882     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2883     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2884     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2885     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2886     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2887     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2888 
2889     redund->nzlocal = nzlocal;
2890     redund->nsends  = nsends;
2891     redund->nrecvs  = nrecvs;
2892     redund->send_rank = send_rank;
2893     redund->recv_rank = recv_rank;
2894     redund->sbuf_nz = sbuf_nz;
2895     redund->rbuf_nz = rbuf_nz;
2896     redund->sbuf_j  = sbuf_j;
2897     redund->sbuf_a  = sbuf_a;
2898     redund->rbuf_j  = rbuf_j;
2899     redund->rbuf_a  = rbuf_a;
2900 
2901     redund->Destroy = C->ops->destroy;
2902     C->ops->destroy = MatDestroy_MatRedundant;
2903   }
2904   PetscFunctionReturn(0);
2905 }
2906 
2907 #undef __FUNCT__
2908 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2909 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2910 {
2911   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2912   PetscErrorCode ierr;
2913   PetscInt       i,*idxb = 0;
2914   PetscScalar    *va,*vb;
2915   Vec            vtmp;
2916 
2917   PetscFunctionBegin;
2918   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2919   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2920   if (idx) {
2921     for (i=0; i<A->rmap->n; i++) {
2922       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2923     }
2924   }
2925 
2926   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2927   if (idx) {
2928     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2929   }
2930   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2931   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2932 
2933   for (i=0; i<A->rmap->n; i++){
2934     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2935       va[i] = vb[i];
2936       if (idx) idx[i] = a->garray[idxb[i]];
2937     }
2938   }
2939 
2940   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2941   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2942   ierr = PetscFree(idxb);CHKERRQ(ierr);
2943   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2944   PetscFunctionReturn(0);
2945 }
2946 
2947 #undef __FUNCT__
2948 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2949 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2950 {
2951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2952   PetscErrorCode ierr;
2953   PetscInt       i,*idxb = 0;
2954   PetscScalar    *va,*vb;
2955   Vec            vtmp;
2956 
2957   PetscFunctionBegin;
2958   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2959   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2960   if (idx) {
2961     for (i=0; i<A->cmap->n; i++) {
2962       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2963     }
2964   }
2965 
2966   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2967   if (idx) {
2968     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2969   }
2970   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2971   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2972 
2973   for (i=0; i<A->rmap->n; i++){
2974     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2975       va[i] = vb[i];
2976       if (idx) idx[i] = a->garray[idxb[i]];
2977     }
2978   }
2979 
2980   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2981   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2982   ierr = PetscFree(idxb);CHKERRQ(ierr);
2983   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 #undef __FUNCT__
2988 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2989 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2990 {
2991   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2992   PetscInt       n      = A->rmap->n;
2993   PetscInt       cstart = A->cmap->rstart;
2994   PetscInt      *cmap   = mat->garray;
2995   PetscInt      *diagIdx, *offdiagIdx;
2996   Vec            diagV, offdiagV;
2997   PetscScalar   *a, *diagA, *offdiagA;
2998   PetscInt       r;
2999   PetscErrorCode ierr;
3000 
3001   PetscFunctionBegin;
3002   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3003   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
3004   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
3005   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3006   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3007   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3008   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3009   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3010   for (r = 0; r < n; ++r) {
3011     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3012       a[r]   = diagA[r];
3013       idx[r] = cstart + diagIdx[r];
3014     } else {
3015       a[r]   = offdiagA[r];
3016       idx[r] = cmap[offdiagIdx[r]];
3017     }
3018   }
3019   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3020   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3021   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3022   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3023   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3024   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3025   PetscFunctionReturn(0);
3026 }
3027 
3028 #undef __FUNCT__
3029 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3030 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3031 {
3032   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
3033   PetscInt       n      = A->rmap->n;
3034   PetscInt       cstart = A->cmap->rstart;
3035   PetscInt      *cmap   = mat->garray;
3036   PetscInt      *diagIdx, *offdiagIdx;
3037   Vec            diagV, offdiagV;
3038   PetscScalar   *a, *diagA, *offdiagA;
3039   PetscInt       r;
3040   PetscErrorCode ierr;
3041 
3042   PetscFunctionBegin;
3043   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3044   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3045   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3046   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3047   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3048   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3049   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3050   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3051   for (r = 0; r < n; ++r) {
3052     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3053       a[r]   = diagA[r];
3054       idx[r] = cstart + diagIdx[r];
3055     } else {
3056       a[r]   = offdiagA[r];
3057       idx[r] = cmap[offdiagIdx[r]];
3058     }
3059   }
3060   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3061   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3062   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3063   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3064   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3065   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 #undef __FUNCT__
3070 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3071 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3072 {
3073   PetscErrorCode ierr;
3074   Mat            *dummy;
3075 
3076   PetscFunctionBegin;
3077   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3078   *newmat = *dummy;
3079   ierr = PetscFree(dummy);CHKERRQ(ierr);
3080   PetscFunctionReturn(0);
3081 }
3082 
3083 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3084 
3085 #undef __FUNCT__
3086 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3087 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3088 {
3089   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3090   PetscErrorCode ierr;
3091 
3092   PetscFunctionBegin;
3093   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3094   PetscFunctionReturn(0);
3095 }
3096 
3097 #undef __FUNCT__
3098 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3099 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3100 {
3101   PetscErrorCode ierr;
3102   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3103 
3104   PetscFunctionBegin;
3105   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3106   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3107   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3108   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3109   PetscFunctionReturn(0);
3110 }
3111 
3112 /* -------------------------------------------------------------------*/
3113 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3114        MatGetRow_MPIAIJ,
3115        MatRestoreRow_MPIAIJ,
3116        MatMult_MPIAIJ,
3117 /* 4*/ MatMultAdd_MPIAIJ,
3118        MatMultTranspose_MPIAIJ,
3119        MatMultTransposeAdd_MPIAIJ,
3120 #ifdef PETSC_HAVE_PBGL
3121        MatSolve_MPIAIJ,
3122 #else
3123        0,
3124 #endif
3125        0,
3126        0,
3127 /*10*/ 0,
3128        0,
3129        0,
3130        MatSOR_MPIAIJ,
3131        MatTranspose_MPIAIJ,
3132 /*15*/ MatGetInfo_MPIAIJ,
3133        MatEqual_MPIAIJ,
3134        MatGetDiagonal_MPIAIJ,
3135        MatDiagonalScale_MPIAIJ,
3136        MatNorm_MPIAIJ,
3137 /*20*/ MatAssemblyBegin_MPIAIJ,
3138        MatAssemblyEnd_MPIAIJ,
3139        MatSetOption_MPIAIJ,
3140        MatZeroEntries_MPIAIJ,
3141 /*24*/ MatZeroRows_MPIAIJ,
3142        0,
3143 #ifdef PETSC_HAVE_PBGL
3144        0,
3145 #else
3146        0,
3147 #endif
3148        0,
3149        0,
3150 /*29*/ MatSetUp_MPIAIJ,
3151 #ifdef PETSC_HAVE_PBGL
3152        0,
3153 #else
3154        0,
3155 #endif
3156        0,
3157        0,
3158        0,
3159 /*34*/ MatDuplicate_MPIAIJ,
3160        0,
3161        0,
3162        0,
3163        0,
3164 /*39*/ MatAXPY_MPIAIJ,
3165        MatGetSubMatrices_MPIAIJ,
3166        MatIncreaseOverlap_MPIAIJ,
3167        MatGetValues_MPIAIJ,
3168        MatCopy_MPIAIJ,
3169 /*44*/ MatGetRowMax_MPIAIJ,
3170        MatScale_MPIAIJ,
3171        0,
3172        0,
3173        MatZeroRowsColumns_MPIAIJ,
3174 /*49*/ MatSetRandom_MPIAIJ,
3175        0,
3176        0,
3177        0,
3178        0,
3179 /*54*/ MatFDColoringCreate_MPIAIJ,
3180        0,
3181        MatSetUnfactored_MPIAIJ,
3182        MatPermute_MPIAIJ,
3183        0,
3184 /*59*/ MatGetSubMatrix_MPIAIJ,
3185        MatDestroy_MPIAIJ,
3186        MatView_MPIAIJ,
3187        0,
3188        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3189 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3190        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3191        0,
3192        0,
3193        0,
3194 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3195        MatGetRowMinAbs_MPIAIJ,
3196        0,
3197        MatSetColoring_MPIAIJ,
3198 #if defined(PETSC_HAVE_ADIC)
3199        MatSetValuesAdic_MPIAIJ,
3200 #else
3201        0,
3202 #endif
3203        MatSetValuesAdifor_MPIAIJ,
3204 /*75*/ MatFDColoringApply_AIJ,
3205        0,
3206        0,
3207        0,
3208        MatFindZeroDiagonals_MPIAIJ,
3209 /*80*/ 0,
3210        0,
3211        0,
3212 /*83*/ MatLoad_MPIAIJ,
3213        0,
3214        0,
3215        0,
3216        0,
3217        0,
3218 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3219        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3220        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3221        MatPtAP_MPIAIJ_MPIAIJ,
3222        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3223 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3224        0,
3225        0,
3226        0,
3227        0,
3228 /*99*/ 0,
3229        0,
3230        0,
3231        MatConjugate_MPIAIJ,
3232        0,
3233 /*104*/MatSetValuesRow_MPIAIJ,
3234        MatRealPart_MPIAIJ,
3235        MatImaginaryPart_MPIAIJ,
3236        0,
3237        0,
3238 /*109*/0,
3239        MatGetRedundantMatrix_MPIAIJ,
3240        MatGetRowMin_MPIAIJ,
3241        0,
3242        0,
3243 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3244        0,
3245        0,
3246        0,
3247        0,
3248 /*119*/0,
3249        0,
3250        0,
3251        0,
3252        MatGetMultiProcBlock_MPIAIJ,
3253 /*124*/MatFindNonzeroRows_MPIAIJ,
3254        MatGetColumnNorms_MPIAIJ,
3255        MatInvertBlockDiagonal_MPIAIJ,
3256        0,
3257        MatGetSubMatricesParallel_MPIAIJ,
3258 /*129*/0,
3259        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3260        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3261        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3262        0,
3263 /*134*/0,
3264        0,
3265        0,
3266        0,
3267        0
3268 };
3269 
3270 /* ----------------------------------------------------------------------------------------*/
3271 
3272 EXTERN_C_BEGIN
3273 #undef __FUNCT__
3274 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3275 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3276 {
3277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3278   PetscErrorCode ierr;
3279 
3280   PetscFunctionBegin;
3281   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3282   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3283   PetscFunctionReturn(0);
3284 }
3285 EXTERN_C_END
3286 
3287 EXTERN_C_BEGIN
3288 #undef __FUNCT__
3289 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3290 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3291 {
3292   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3293   PetscErrorCode ierr;
3294 
3295   PetscFunctionBegin;
3296   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3297   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3298   PetscFunctionReturn(0);
3299 }
3300 EXTERN_C_END
3301 
3302 EXTERN_C_BEGIN
3303 #undef __FUNCT__
3304 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3305 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3306 {
3307   Mat_MPIAIJ     *b;
3308   PetscErrorCode ierr;
3309   PetscInt       i;
3310   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3311 
3312   PetscFunctionBegin;
3313   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3314   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3315   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3316   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3317   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3318   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3319 
3320   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3321   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3322   if (d_nnz) {
3323     for (i=0; i<B->rmap->n; i++) {
3324       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3325     }
3326   }
3327   if (o_nnz) {
3328     for (i=0; i<B->rmap->n; i++) {
3329       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3330     }
3331   }
3332   b = (Mat_MPIAIJ*)B->data;
3333 
3334   if (!B->preallocated) {
3335     /* Explicitly create 2 MATSEQAIJ matrices. */
3336     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3337     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3338     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3339     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3340     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3341     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3342     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3343     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3344     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3345     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3346   }
3347 
3348   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3349   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3350   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3351   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3352   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3353   B->preallocated = PETSC_TRUE;
3354   PetscFunctionReturn(0);
3355 }
3356 EXTERN_C_END
3357 
3358 #undef __FUNCT__
3359 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3360 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3361 {
3362   Mat            mat;
3363   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3364   PetscErrorCode ierr;
3365 
3366   PetscFunctionBegin;
3367   *newmat       = 0;
3368   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3369   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3370   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3371   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3372   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3373   a    = (Mat_MPIAIJ*)mat->data;
3374 
3375   mat->factortype    = matin->factortype;
3376   mat->rmap->bs      = matin->rmap->bs;
3377   mat->cmap->bs      = matin->cmap->bs;
3378   mat->assembled    = PETSC_TRUE;
3379   mat->insertmode   = NOT_SET_VALUES;
3380   mat->preallocated = PETSC_TRUE;
3381 
3382   a->size           = oldmat->size;
3383   a->rank           = oldmat->rank;
3384   a->donotstash     = oldmat->donotstash;
3385   a->roworiented    = oldmat->roworiented;
3386   a->rowindices     = 0;
3387   a->rowvalues      = 0;
3388   a->getrowactive   = PETSC_FALSE;
3389 
3390   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3391   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3392 
3393   if (oldmat->colmap) {
3394 #if defined (PETSC_USE_CTABLE)
3395     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3396 #else
3397     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3398     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3399     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3400 #endif
3401   } else a->colmap = 0;
3402   if (oldmat->garray) {
3403     PetscInt len;
3404     len  = oldmat->B->cmap->n;
3405     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3406     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3407     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3408   } else a->garray = 0;
3409 
3410   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3411   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3412   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3413   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3414   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3415   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3416   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3417   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3418   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3419   *newmat = mat;
3420   PetscFunctionReturn(0);
3421 }
3422 
3423 
3424 
3425 #undef __FUNCT__
3426 #define __FUNCT__ "MatLoad_MPIAIJ"
3427 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3428 {
3429   PetscScalar    *vals,*svals;
3430   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3431   PetscErrorCode ierr;
3432   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3433   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3434   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3435   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3436   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3437   int            fd;
3438   PetscInt       bs = 1;
3439 
3440   PetscFunctionBegin;
3441   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3442   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3443   if (!rank) {
3444     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3445     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3446     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3447   }
3448 
3449   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3450   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3451   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3452 
3453   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3454 
3455   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3456   M = header[1]; N = header[2];
3457   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3458   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3459   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3460 
3461   /* If global sizes are set, check if they are consistent with that given in the file */
3462   if (sizesset) {
3463     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3464   }
3465   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3466   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3467 
3468   /* determine ownership of all (block) rows */
3469   if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3470   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3471   else m = newMat->rmap->n; /* Set by user */
3472 
3473   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3474   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3475 
3476   /* First process needs enough room for process with most rows */
3477   if (!rank) {
3478     mmax = rowners[1];
3479     for (i=2; i<=size; i++) {
3480       mmax = PetscMax(mmax, rowners[i]);
3481     }
3482   } else mmax = m;
3483 
3484   rowners[0] = 0;
3485   for (i=2; i<=size; i++) {
3486     rowners[i] += rowners[i-1];
3487   }
3488   rstart = rowners[rank];
3489   rend   = rowners[rank+1];
3490 
3491   /* distribute row lengths to all processors */
3492   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3493   if (!rank) {
3494     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3495     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3496     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3497     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3498     for (j=0; j<m; j++) {
3499       procsnz[0] += ourlens[j];
3500     }
3501     for (i=1; i<size; i++) {
3502       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3503       /* calculate the number of nonzeros on each processor */
3504       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3505         procsnz[i] += rowlengths[j];
3506       }
3507       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3508     }
3509     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3510   } else {
3511     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3512   }
3513 
3514   if (!rank) {
3515     /* determine max buffer needed and allocate it */
3516     maxnz = 0;
3517     for (i=0; i<size; i++) {
3518       maxnz = PetscMax(maxnz,procsnz[i]);
3519     }
3520     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3521 
3522     /* read in my part of the matrix column indices  */
3523     nz   = procsnz[0];
3524     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3525     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3526 
3527     /* read in every one elses and ship off */
3528     for (i=1; i<size; i++) {
3529       nz     = procsnz[i];
3530       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3531       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3532     }
3533     ierr = PetscFree(cols);CHKERRQ(ierr);
3534   } else {
3535     /* determine buffer space needed for message */
3536     nz = 0;
3537     for (i=0; i<m; i++) {
3538       nz += ourlens[i];
3539     }
3540     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3541 
3542     /* receive message of column indices*/
3543     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3544   }
3545 
3546   /* determine column ownership if matrix is not square */
3547   if (N != M) {
3548     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3549     else n = newMat->cmap->n;
3550     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3551     cstart = cend - n;
3552   } else {
3553     cstart = rstart;
3554     cend   = rend;
3555     n      = cend - cstart;
3556   }
3557 
3558   /* loop over local rows, determining number of off diagonal entries */
3559   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3560   jj = 0;
3561   for (i=0; i<m; i++) {
3562     for (j=0; j<ourlens[i]; j++) {
3563       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3564       jj++;
3565     }
3566   }
3567 
3568   for (i=0; i<m; i++) {
3569     ourlens[i] -= offlens[i];
3570   }
3571   if (!sizesset) {
3572     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3573   }
3574 
3575   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3576 
3577   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3578 
3579   for (i=0; i<m; i++) {
3580     ourlens[i] += offlens[i];
3581   }
3582 
3583   if (!rank) {
3584     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3585 
3586     /* read in my part of the matrix numerical values  */
3587     nz   = procsnz[0];
3588     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3589 
3590     /* insert into matrix */
3591     jj      = rstart;
3592     smycols = mycols;
3593     svals   = vals;
3594     for (i=0; i<m; i++) {
3595       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3596       smycols += ourlens[i];
3597       svals   += ourlens[i];
3598       jj++;
3599     }
3600 
3601     /* read in other processors and ship out */
3602     for (i=1; i<size; i++) {
3603       nz     = procsnz[i];
3604       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3605       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3606     }
3607     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3608   } else {
3609     /* receive numeric values */
3610     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3611 
3612     /* receive message of values*/
3613     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3614 
3615     /* insert into matrix */
3616     jj      = rstart;
3617     smycols = mycols;
3618     svals   = vals;
3619     for (i=0; i<m; i++) {
3620       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3621       smycols += ourlens[i];
3622       svals   += ourlens[i];
3623       jj++;
3624     }
3625   }
3626   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3627   ierr = PetscFree(vals);CHKERRQ(ierr);
3628   ierr = PetscFree(mycols);CHKERRQ(ierr);
3629   ierr = PetscFree(rowners);CHKERRQ(ierr);
3630   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632 
3633   PetscFunctionReturn(0);
3634 }
3635 
3636 #undef __FUNCT__
3637 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3638 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3639 {
3640   PetscErrorCode ierr;
3641   IS             iscol_local;
3642   PetscInt       csize;
3643 
3644   PetscFunctionBegin;
3645   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3646   if (call == MAT_REUSE_MATRIX) {
3647     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3648     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3649   } else {
3650     PetscInt cbs;
3651     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3652     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3653     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3654   }
3655   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3656   if (call == MAT_INITIAL_MATRIX) {
3657     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3658     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3659   }
3660   PetscFunctionReturn(0);
3661 }
3662 
3663 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3664 #undef __FUNCT__
3665 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3666 /*
3667     Not great since it makes two copies of the submatrix, first an SeqAIJ
3668   in local and then by concatenating the local matrices the end result.
3669   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3670 
3671   Note: This requires a sequential iscol with all indices.
3672 */
3673 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3674 {
3675   PetscErrorCode ierr;
3676   PetscMPIInt    rank,size;
3677   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3678   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3679   PetscBool      allcolumns, colflag;
3680   Mat            M,Mreuse;
3681   MatScalar      *vwork,*aa;
3682   MPI_Comm       comm = ((PetscObject)mat)->comm;
3683   Mat_SeqAIJ     *aij;
3684 
3685 
3686   PetscFunctionBegin;
3687   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3688   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3689 
3690   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3691   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3692   if (colflag && ncol == mat->cmap->N){
3693     allcolumns = PETSC_TRUE;
3694   } else {
3695     allcolumns = PETSC_FALSE;
3696   }
3697   if (call ==  MAT_REUSE_MATRIX) {
3698     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3699     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3700     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3701   } else {
3702     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3703   }
3704 
3705   /*
3706       m - number of local rows
3707       n - number of columns (same on all processors)
3708       rstart - first row in new global matrix generated
3709   */
3710   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3711   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3712   if (call == MAT_INITIAL_MATRIX) {
3713     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3714     ii  = aij->i;
3715     jj  = aij->j;
3716 
3717     /*
3718         Determine the number of non-zeros in the diagonal and off-diagonal
3719         portions of the matrix in order to do correct preallocation
3720     */
3721 
3722     /* first get start and end of "diagonal" columns */
3723     if (csize == PETSC_DECIDE) {
3724       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3725       if (mglobal == n) { /* square matrix */
3726 	nlocal = m;
3727       } else {
3728         nlocal = n/size + ((n % size) > rank);
3729       }
3730     } else {
3731       nlocal = csize;
3732     }
3733     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3734     rstart = rend - nlocal;
3735     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3736 
3737     /* next, compute all the lengths */
3738     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3739     olens = dlens + m;
3740     for (i=0; i<m; i++) {
3741       jend = ii[i+1] - ii[i];
3742       olen = 0;
3743       dlen = 0;
3744       for (j=0; j<jend; j++) {
3745         if (*jj < rstart || *jj >= rend) olen++;
3746         else dlen++;
3747         jj++;
3748       }
3749       olens[i] = olen;
3750       dlens[i] = dlen;
3751     }
3752     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3753     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3754     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3755     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3756     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3757     ierr = PetscFree(dlens);CHKERRQ(ierr);
3758   } else {
3759     PetscInt ml,nl;
3760 
3761     M = *newmat;
3762     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3763     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3764     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3765     /*
3766          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3767        rather than the slower MatSetValues().
3768     */
3769     M->was_assembled = PETSC_TRUE;
3770     M->assembled     = PETSC_FALSE;
3771   }
3772   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3773   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3774   ii  = aij->i;
3775   jj  = aij->j;
3776   aa  = aij->a;
3777   for (i=0; i<m; i++) {
3778     row   = rstart + i;
3779     nz    = ii[i+1] - ii[i];
3780     cwork = jj;     jj += nz;
3781     vwork = aa;     aa += nz;
3782     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3783   }
3784 
3785   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3786   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3787   *newmat = M;
3788 
3789   /* save submatrix used in processor for next request */
3790   if (call ==  MAT_INITIAL_MATRIX) {
3791     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3792     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3793   }
3794 
3795   PetscFunctionReturn(0);
3796 }
3797 
3798 EXTERN_C_BEGIN
3799 #undef __FUNCT__
3800 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3801 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3802 {
3803   PetscInt       m,cstart, cend,j,nnz,i,d;
3804   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3805   const PetscInt *JJ;
3806   PetscScalar    *values;
3807   PetscErrorCode ierr;
3808 
3809   PetscFunctionBegin;
3810   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3811 
3812   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3813   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3814   m      = B->rmap->n;
3815   cstart = B->cmap->rstart;
3816   cend   = B->cmap->rend;
3817   rstart = B->rmap->rstart;
3818 
3819   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3820 
3821 #if defined(PETSC_USE_DEBUGGING)
3822   for (i=0; i<m; i++) {
3823     nnz     = Ii[i+1]- Ii[i];
3824     JJ      = J + Ii[i];
3825     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3826     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3827     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3828   }
3829 #endif
3830 
3831   for (i=0; i<m; i++) {
3832     nnz     = Ii[i+1]- Ii[i];
3833     JJ      = J + Ii[i];
3834     nnz_max = PetscMax(nnz_max,nnz);
3835     d       = 0;
3836     for (j=0; j<nnz; j++) {
3837       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3838     }
3839     d_nnz[i] = d;
3840     o_nnz[i] = nnz - d;
3841   }
3842   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3843   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3844 
3845   if (v) values = (PetscScalar*)v;
3846   else {
3847     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3848     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3849   }
3850 
3851   for (i=0; i<m; i++) {
3852     ii   = i + rstart;
3853     nnz  = Ii[i+1]- Ii[i];
3854     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3855   }
3856   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3857   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3858 
3859   if (!v) {
3860     ierr = PetscFree(values);CHKERRQ(ierr);
3861   }
3862   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3863   PetscFunctionReturn(0);
3864 }
3865 EXTERN_C_END
3866 
3867 #undef __FUNCT__
3868 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3869 /*@
3870    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3871    (the default parallel PETSc format).
3872 
3873    Collective on MPI_Comm
3874 
3875    Input Parameters:
3876 +  B - the matrix
3877 .  i - the indices into j for the start of each local row (starts with zero)
3878 .  j - the column indices for each local row (starts with zero)
3879 -  v - optional values in the matrix
3880 
3881    Level: developer
3882 
3883    Notes:
3884        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3885      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3886      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3887 
3888        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3889 
3890        The format which is used for the sparse matrix input, is equivalent to a
3891     row-major ordering.. i.e for the following matrix, the input data expected is
3892     as shown:
3893 
3894         1 0 0
3895         2 0 3     P0
3896        -------
3897         4 5 6     P1
3898 
3899      Process0 [P0]: rows_owned=[0,1]
3900         i =  {0,1,3}  [size = nrow+1  = 2+1]
3901         j =  {0,0,2}  [size = nz = 6]
3902         v =  {1,2,3}  [size = nz = 6]
3903 
3904      Process1 [P1]: rows_owned=[2]
3905         i =  {0,3}    [size = nrow+1  = 1+1]
3906         j =  {0,1,2}  [size = nz = 6]
3907         v =  {4,5,6}  [size = nz = 6]
3908 
3909 .keywords: matrix, aij, compressed row, sparse, parallel
3910 
3911 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3912           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3913 @*/
3914 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3915 {
3916   PetscErrorCode ierr;
3917 
3918   PetscFunctionBegin;
3919   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3920   PetscFunctionReturn(0);
3921 }
3922 
3923 #undef __FUNCT__
3924 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3925 /*@C
3926    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3927    (the default parallel PETSc format).  For good matrix assembly performance
3928    the user should preallocate the matrix storage by setting the parameters
3929    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3930    performance can be increased by more than a factor of 50.
3931 
3932    Collective on MPI_Comm
3933 
3934    Input Parameters:
3935 +  A - the matrix
3936 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3937            (same value is used for all local rows)
3938 .  d_nnz - array containing the number of nonzeros in the various rows of the
3939            DIAGONAL portion of the local submatrix (possibly different for each row)
3940            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3941            The size of this array is equal to the number of local rows, i.e 'm'.
3942            For matrices that will be factored, you must leave room for (and set)
3943            the diagonal entry even if it is zero.
3944 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3945            submatrix (same value is used for all local rows).
3946 -  o_nnz - array containing the number of nonzeros in the various rows of the
3947            OFF-DIAGONAL portion of the local submatrix (possibly different for
3948            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3949            structure. The size of this array is equal to the number
3950            of local rows, i.e 'm'.
3951 
3952    If the *_nnz parameter is given then the *_nz parameter is ignored
3953 
3954    The AIJ format (also called the Yale sparse matrix format or
3955    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3956    storage.  The stored row and column indices begin with zero.
3957    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3958 
3959    The parallel matrix is partitioned such that the first m0 rows belong to
3960    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3961    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3962 
3963    The DIAGONAL portion of the local submatrix of a processor can be defined
3964    as the submatrix which is obtained by extraction the part corresponding to
3965    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3966    first row that belongs to the processor, r2 is the last row belonging to
3967    the this processor, and c1-c2 is range of indices of the local part of a
3968    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3969    common case of a square matrix, the row and column ranges are the same and
3970    the DIAGONAL part is also square. The remaining portion of the local
3971    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3972 
3973    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3974 
3975    You can call MatGetInfo() to get information on how effective the preallocation was;
3976    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3977    You can also run with the option -info and look for messages with the string
3978    malloc in them to see if additional memory allocation was needed.
3979 
3980    Example usage:
3981 
3982    Consider the following 8x8 matrix with 34 non-zero values, that is
3983    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3984    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3985    as follows:
3986 
3987 .vb
3988             1  2  0  |  0  3  0  |  0  4
3989     Proc0   0  5  6  |  7  0  0  |  8  0
3990             9  0 10  | 11  0  0  | 12  0
3991     -------------------------------------
3992            13  0 14  | 15 16 17  |  0  0
3993     Proc1   0 18  0  | 19 20 21  |  0  0
3994             0  0  0  | 22 23  0  | 24  0
3995     -------------------------------------
3996     Proc2  25 26 27  |  0  0 28  | 29  0
3997            30  0  0  | 31 32 33  |  0 34
3998 .ve
3999 
4000    This can be represented as a collection of submatrices as:
4001 
4002 .vb
4003       A B C
4004       D E F
4005       G H I
4006 .ve
4007 
4008    Where the submatrices A,B,C are owned by proc0, D,E,F are
4009    owned by proc1, G,H,I are owned by proc2.
4010 
4011    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4012    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4013    The 'M','N' parameters are 8,8, and have the same values on all procs.
4014 
4015    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4016    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4017    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4018    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4019    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4020    matrix, ans [DF] as another SeqAIJ matrix.
4021 
4022    When d_nz, o_nz parameters are specified, d_nz storage elements are
4023    allocated for every row of the local diagonal submatrix, and o_nz
4024    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4025    One way to choose d_nz and o_nz is to use the max nonzerors per local
4026    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4027    In this case, the values of d_nz,o_nz are:
4028 .vb
4029      proc0 : dnz = 2, o_nz = 2
4030      proc1 : dnz = 3, o_nz = 2
4031      proc2 : dnz = 1, o_nz = 4
4032 .ve
4033    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4034    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4035    for proc3. i.e we are using 12+15+10=37 storage locations to store
4036    34 values.
4037 
4038    When d_nnz, o_nnz parameters are specified, the storage is specified
4039    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4040    In the above case the values for d_nnz,o_nnz are:
4041 .vb
4042      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4043      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4044      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4045 .ve
4046    Here the space allocated is sum of all the above values i.e 34, and
4047    hence pre-allocation is perfect.
4048 
4049    Level: intermediate
4050 
4051 .keywords: matrix, aij, compressed row, sparse, parallel
4052 
4053 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4054           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4055 @*/
4056 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4057 {
4058   PetscErrorCode ierr;
4059 
4060   PetscFunctionBegin;
4061   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4062   PetscValidType(B,1);
4063   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4064   PetscFunctionReturn(0);
4065 }
4066 
4067 #undef __FUNCT__
4068 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4069 /*@
4070      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4071          CSR format the local rows.
4072 
4073    Collective on MPI_Comm
4074 
4075    Input Parameters:
4076 +  comm - MPI communicator
4077 .  m - number of local rows (Cannot be PETSC_DECIDE)
4078 .  n - This value should be the same as the local size used in creating the
4079        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4080        calculated if N is given) For square matrices n is almost always m.
4081 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4082 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4083 .   i - row indices
4084 .   j - column indices
4085 -   a - matrix values
4086 
4087    Output Parameter:
4088 .   mat - the matrix
4089 
4090    Level: intermediate
4091 
4092    Notes:
4093        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4094      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4095      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4096 
4097        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4098 
4099        The format which is used for the sparse matrix input, is equivalent to a
4100     row-major ordering.. i.e for the following matrix, the input data expected is
4101     as shown:
4102 
4103         1 0 0
4104         2 0 3     P0
4105        -------
4106         4 5 6     P1
4107 
4108      Process0 [P0]: rows_owned=[0,1]
4109         i =  {0,1,3}  [size = nrow+1  = 2+1]
4110         j =  {0,0,2}  [size = nz = 6]
4111         v =  {1,2,3}  [size = nz = 6]
4112 
4113      Process1 [P1]: rows_owned=[2]
4114         i =  {0,3}    [size = nrow+1  = 1+1]
4115         j =  {0,1,2}  [size = nz = 6]
4116         v =  {4,5,6}  [size = nz = 6]
4117 
4118 .keywords: matrix, aij, compressed row, sparse, parallel
4119 
4120 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4121           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4122 @*/
4123 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4124 {
4125   PetscErrorCode ierr;
4126 
4127   PetscFunctionBegin;
4128   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4129   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4130   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4131   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4132   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4133   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4134   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4135   PetscFunctionReturn(0);
4136 }
4137 
4138 #undef __FUNCT__
4139 #define __FUNCT__ "MatCreateAIJ"
4140 /*@C
4141    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4142    (the default parallel PETSc format).  For good matrix assembly performance
4143    the user should preallocate the matrix storage by setting the parameters
4144    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4145    performance can be increased by more than a factor of 50.
4146 
4147    Collective on MPI_Comm
4148 
4149    Input Parameters:
4150 +  comm - MPI communicator
4151 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4152            This value should be the same as the local size used in creating the
4153            y vector for the matrix-vector product y = Ax.
4154 .  n - This value should be the same as the local size used in creating the
4155        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4156        calculated if N is given) For square matrices n is almost always m.
4157 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4158 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4159 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4160            (same value is used for all local rows)
4161 .  d_nnz - array containing the number of nonzeros in the various rows of the
4162            DIAGONAL portion of the local submatrix (possibly different for each row)
4163            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4164            The size of this array is equal to the number of local rows, i.e 'm'.
4165 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4166            submatrix (same value is used for all local rows).
4167 -  o_nnz - array containing the number of nonzeros in the various rows of the
4168            OFF-DIAGONAL portion of the local submatrix (possibly different for
4169            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4170            structure. The size of this array is equal to the number
4171            of local rows, i.e 'm'.
4172 
4173    Output Parameter:
4174 .  A - the matrix
4175 
4176    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4177    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4178    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4179 
4180    Notes:
4181    If the *_nnz parameter is given then the *_nz parameter is ignored
4182 
4183    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4184    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4185    storage requirements for this matrix.
4186 
4187    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4188    processor than it must be used on all processors that share the object for
4189    that argument.
4190 
4191    The user MUST specify either the local or global matrix dimensions
4192    (possibly both).
4193 
4194    The parallel matrix is partitioned across processors such that the
4195    first m0 rows belong to process 0, the next m1 rows belong to
4196    process 1, the next m2 rows belong to process 2 etc.. where
4197    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4198    values corresponding to [m x N] submatrix.
4199 
4200    The columns are logically partitioned with the n0 columns belonging
4201    to 0th partition, the next n1 columns belonging to the next
4202    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4203 
4204    The DIAGONAL portion of the local submatrix on any given processor
4205    is the submatrix corresponding to the rows and columns m,n
4206    corresponding to the given processor. i.e diagonal matrix on
4207    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4208    etc. The remaining portion of the local submatrix [m x (N-n)]
4209    constitute the OFF-DIAGONAL portion. The example below better
4210    illustrates this concept.
4211 
4212    For a square global matrix we define each processor's diagonal portion
4213    to be its local rows and the corresponding columns (a square submatrix);
4214    each processor's off-diagonal portion encompasses the remainder of the
4215    local matrix (a rectangular submatrix).
4216 
4217    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4218 
4219    When calling this routine with a single process communicator, a matrix of
4220    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4221    type of communicator, use the construction mechanism:
4222      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4223 
4224    By default, this format uses inodes (identical nodes) when possible.
4225    We search for consecutive rows with the same nonzero structure, thereby
4226    reusing matrix information to achieve increased efficiency.
4227 
4228    Options Database Keys:
4229 +  -mat_no_inode  - Do not use inodes
4230 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4231 -  -mat_aij_oneindex - Internally use indexing starting at 1
4232         rather than 0.  Note that when calling MatSetValues(),
4233         the user still MUST index entries starting at 0!
4234 
4235 
4236    Example usage:
4237 
4238    Consider the following 8x8 matrix with 34 non-zero values, that is
4239    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4240    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4241    as follows:
4242 
4243 .vb
4244             1  2  0  |  0  3  0  |  0  4
4245     Proc0   0  5  6  |  7  0  0  |  8  0
4246             9  0 10  | 11  0  0  | 12  0
4247     -------------------------------------
4248            13  0 14  | 15 16 17  |  0  0
4249     Proc1   0 18  0  | 19 20 21  |  0  0
4250             0  0  0  | 22 23  0  | 24  0
4251     -------------------------------------
4252     Proc2  25 26 27  |  0  0 28  | 29  0
4253            30  0  0  | 31 32 33  |  0 34
4254 .ve
4255 
4256    This can be represented as a collection of submatrices as:
4257 
4258 .vb
4259       A B C
4260       D E F
4261       G H I
4262 .ve
4263 
4264    Where the submatrices A,B,C are owned by proc0, D,E,F are
4265    owned by proc1, G,H,I are owned by proc2.
4266 
4267    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4268    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4269    The 'M','N' parameters are 8,8, and have the same values on all procs.
4270 
4271    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4272    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4273    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4274    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4275    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4276    matrix, ans [DF] as another SeqAIJ matrix.
4277 
4278    When d_nz, o_nz parameters are specified, d_nz storage elements are
4279    allocated for every row of the local diagonal submatrix, and o_nz
4280    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4281    One way to choose d_nz and o_nz is to use the max nonzerors per local
4282    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4283    In this case, the values of d_nz,o_nz are:
4284 .vb
4285      proc0 : dnz = 2, o_nz = 2
4286      proc1 : dnz = 3, o_nz = 2
4287      proc2 : dnz = 1, o_nz = 4
4288 .ve
4289    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4290    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4291    for proc3. i.e we are using 12+15+10=37 storage locations to store
4292    34 values.
4293 
4294    When d_nnz, o_nnz parameters are specified, the storage is specified
4295    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4296    In the above case the values for d_nnz,o_nnz are:
4297 .vb
4298      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4299      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4300      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4301 .ve
4302    Here the space allocated is sum of all the above values i.e 34, and
4303    hence pre-allocation is perfect.
4304 
4305    Level: intermediate
4306 
4307 .keywords: matrix, aij, compressed row, sparse, parallel
4308 
4309 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4310           MPIAIJ, MatCreateMPIAIJWithArrays()
4311 @*/
4312 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4313 {
4314   PetscErrorCode ierr;
4315   PetscMPIInt    size;
4316 
4317   PetscFunctionBegin;
4318   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4319   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4320   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4321   if (size > 1) {
4322     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4323     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4324   } else {
4325     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4326     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4327   }
4328   PetscFunctionReturn(0);
4329 }
4330 
4331 #undef __FUNCT__
4332 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4333 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4334 {
4335   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4336 
4337   PetscFunctionBegin;
4338   *Ad     = a->A;
4339   *Ao     = a->B;
4340   *colmap = a->garray;
4341   PetscFunctionReturn(0);
4342 }
4343 
4344 #undef __FUNCT__
4345 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4346 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4347 {
4348   PetscErrorCode ierr;
4349   PetscInt       i;
4350   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4351 
4352   PetscFunctionBegin;
4353   if (coloring->ctype == IS_COLORING_GLOBAL) {
4354     ISColoringValue *allcolors,*colors;
4355     ISColoring      ocoloring;
4356 
4357     /* set coloring for diagonal portion */
4358     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4359 
4360     /* set coloring for off-diagonal portion */
4361     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4362     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4363     for (i=0; i<a->B->cmap->n; i++) {
4364       colors[i] = allcolors[a->garray[i]];
4365     }
4366     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4367     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4368     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4369     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4370   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4371     ISColoringValue *colors;
4372     PetscInt        *larray;
4373     ISColoring      ocoloring;
4374 
4375     /* set coloring for diagonal portion */
4376     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4377     for (i=0; i<a->A->cmap->n; i++) {
4378       larray[i] = i + A->cmap->rstart;
4379     }
4380     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4381     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4382     for (i=0; i<a->A->cmap->n; i++) {
4383       colors[i] = coloring->colors[larray[i]];
4384     }
4385     ierr = PetscFree(larray);CHKERRQ(ierr);
4386     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4387     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4388     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4389 
4390     /* set coloring for off-diagonal portion */
4391     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4392     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4393     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4394     for (i=0; i<a->B->cmap->n; i++) {
4395       colors[i] = coloring->colors[larray[i]];
4396     }
4397     ierr = PetscFree(larray);CHKERRQ(ierr);
4398     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4399     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4400     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4401   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4402 
4403   PetscFunctionReturn(0);
4404 }
4405 
4406 #if defined(PETSC_HAVE_ADIC)
4407 #undef __FUNCT__
4408 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4409 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4410 {
4411   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4412   PetscErrorCode ierr;
4413 
4414   PetscFunctionBegin;
4415   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4416   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4417   PetscFunctionReturn(0);
4418 }
4419 #endif
4420 
4421 #undef __FUNCT__
4422 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4423 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4424 {
4425   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4426   PetscErrorCode ierr;
4427 
4428   PetscFunctionBegin;
4429   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4430   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4431   PetscFunctionReturn(0);
4432 }
4433 
4434 #undef __FUNCT__
4435 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4436 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4437 {
4438   PetscErrorCode ierr;
4439   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4440   PetscInt       *indx;
4441 
4442   PetscFunctionBegin;
4443   /* This routine will ONLY return MPIAIJ type matrix */
4444   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4445   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4446   if (n == PETSC_DECIDE){
4447     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4448   }
4449   /* Check sum(n) = N */
4450   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4451   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4452 
4453   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4454   rstart -= m;
4455 
4456   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4457   for (i=0;i<m;i++) {
4458     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4459     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4460     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4461   }
4462 
4463   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4464   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4465   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4466   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4467   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4468   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4469   PetscFunctionReturn(0);
4470 }
4471 
4472 #undef __FUNCT__
4473 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4474 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4475 {
4476   PetscErrorCode ierr;
4477   PetscInt       m,N,i,rstart,nnz,Ii;
4478   PetscInt       *indx;
4479   PetscScalar    *values;
4480 
4481   PetscFunctionBegin;
4482   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4483   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4484   for (i=0;i<m;i++) {
4485     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4486     Ii    = i + rstart;
4487     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4488     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4489   }
4490   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4491   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4492   PetscFunctionReturn(0);
4493 }
4494 
4495 #undef __FUNCT__
4496 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4497 /*@
4498       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4499                  matrices from each processor
4500 
4501     Collective on MPI_Comm
4502 
4503    Input Parameters:
4504 +    comm - the communicators the parallel matrix will live on
4505 .    inmat - the input sequential matrices
4506 .    n - number of local columns (or PETSC_DECIDE)
4507 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4508 
4509    Output Parameter:
4510 .    outmat - the parallel matrix generated
4511 
4512     Level: advanced
4513 
4514    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4515 
4516 @*/
4517 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4518 {
4519   PetscErrorCode ierr;
4520 
4521   PetscFunctionBegin;
4522   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4523   if (scall == MAT_INITIAL_MATRIX){
4524     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4525   }
4526   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4527   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4528   PetscFunctionReturn(0);
4529 }
4530 
4531 #undef __FUNCT__
4532 #define __FUNCT__ "MatFileSplit"
4533 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4534 {
4535   PetscErrorCode    ierr;
4536   PetscMPIInt       rank;
4537   PetscInt          m,N,i,rstart,nnz;
4538   size_t            len;
4539   const PetscInt    *indx;
4540   PetscViewer       out;
4541   char              *name;
4542   Mat               B;
4543   const PetscScalar *values;
4544 
4545   PetscFunctionBegin;
4546   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4547   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4548   /* Should this be the type of the diagonal block of A? */
4549   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4550   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4551   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4552   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4553   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4554   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4555   for (i=0;i<m;i++) {
4556     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4557     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4558     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4559   }
4560   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4561   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4562 
4563   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4564   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4565   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4566   sprintf(name,"%s.%d",outfile,rank);
4567   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4568   ierr = PetscFree(name);
4569   ierr = MatView(B,out);CHKERRQ(ierr);
4570   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4571   ierr = MatDestroy(&B);CHKERRQ(ierr);
4572   PetscFunctionReturn(0);
4573 }
4574 
4575 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4576 #undef __FUNCT__
4577 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4578 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4579 {
4580   PetscErrorCode       ierr;
4581   Mat_Merge_SeqsToMPI  *merge;
4582   PetscContainer       container;
4583 
4584   PetscFunctionBegin;
4585   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4586   if (container) {
4587     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4588     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4589     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4590     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4591     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4592     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4593     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4594     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4595     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4596     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4597     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4598     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4599     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4600     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4601     ierr = PetscFree(merge);CHKERRQ(ierr);
4602     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4603   }
4604   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4605   PetscFunctionReturn(0);
4606 }
4607 
4608 #include <../src/mat/utils/freespace.h>
4609 #include <petscbt.h>
4610 
4611 #undef __FUNCT__
4612 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4613 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4614 {
4615   PetscErrorCode       ierr;
4616   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4617   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4618   PetscMPIInt          size,rank,taga,*len_s;
4619   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4620   PetscInt             proc,m;
4621   PetscInt             **buf_ri,**buf_rj;
4622   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4623   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4624   MPI_Request          *s_waits,*r_waits;
4625   MPI_Status           *status;
4626   MatScalar            *aa=a->a;
4627   MatScalar            **abuf_r,*ba_i;
4628   Mat_Merge_SeqsToMPI  *merge;
4629   PetscContainer       container;
4630 
4631   PetscFunctionBegin;
4632   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4633 
4634   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4635   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4636 
4637   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4638   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4639 
4640   bi     = merge->bi;
4641   bj     = merge->bj;
4642   buf_ri = merge->buf_ri;
4643   buf_rj = merge->buf_rj;
4644 
4645   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4646   owners = merge->rowmap->range;
4647   len_s  = merge->len_s;
4648 
4649   /* send and recv matrix values */
4650   /*-----------------------------*/
4651   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4652   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4653 
4654   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4655   for (proc=0,k=0; proc<size; proc++){
4656     if (!len_s[proc]) continue;
4657     i = owners[proc];
4658     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4659     k++;
4660   }
4661 
4662   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4663   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4664   ierr = PetscFree(status);CHKERRQ(ierr);
4665 
4666   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4667   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4668 
4669   /* insert mat values of mpimat */
4670   /*----------------------------*/
4671   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4672   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4673 
4674   for (k=0; k<merge->nrecv; k++){
4675     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4676     nrows = *(buf_ri_k[k]);
4677     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4678     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4679   }
4680 
4681   /* set values of ba */
4682   m = merge->rowmap->n;
4683   for (i=0; i<m; i++) {
4684     arow = owners[rank] + i;
4685     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4686     bnzi = bi[i+1] - bi[i];
4687     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4688 
4689     /* add local non-zero vals of this proc's seqmat into ba */
4690     anzi = ai[arow+1] - ai[arow];
4691     aj   = a->j + ai[arow];
4692     aa   = a->a + ai[arow];
4693     nextaj = 0;
4694     for (j=0; nextaj<anzi; j++){
4695       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4696         ba_i[j] += aa[nextaj++];
4697       }
4698     }
4699 
4700     /* add received vals into ba */
4701     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4702       /* i-th row */
4703       if (i == *nextrow[k]) {
4704         anzi = *(nextai[k]+1) - *nextai[k];
4705         aj   = buf_rj[k] + *(nextai[k]);
4706         aa   = abuf_r[k] + *(nextai[k]);
4707         nextaj = 0;
4708         for (j=0; nextaj<anzi; j++){
4709           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4710             ba_i[j] += aa[nextaj++];
4711           }
4712         }
4713         nextrow[k]++; nextai[k]++;
4714       }
4715     }
4716     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4717   }
4718   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4719   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4720 
4721   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4722   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4723   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4724   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4725   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4726   PetscFunctionReturn(0);
4727 }
4728 
4729 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4730 
4731 #undef __FUNCT__
4732 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4733 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4734 {
4735   PetscErrorCode       ierr;
4736   Mat                  B_mpi;
4737   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4738   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4739   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4740   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4741   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4742   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4743   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4744   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4745   MPI_Status           *status;
4746   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4747   PetscBT              lnkbt;
4748   Mat_Merge_SeqsToMPI  *merge;
4749   PetscContainer       container;
4750 
4751   PetscFunctionBegin;
4752   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4753 
4754   /* make sure it is a PETSc comm */
4755   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4756   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4757   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4758 
4759   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4760   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4761 
4762   /* determine row ownership */
4763   /*---------------------------------------------------------*/
4764   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4765   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4766   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4767   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4768   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4769   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4770   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4771 
4772   m      = merge->rowmap->n;
4773   M      = merge->rowmap->N;
4774   owners = merge->rowmap->range;
4775 
4776   /* determine the number of messages to send, their lengths */
4777   /*---------------------------------------------------------*/
4778   len_s  = merge->len_s;
4779 
4780   len = 0;  /* length of buf_si[] */
4781   merge->nsend = 0;
4782   for (proc=0; proc<size; proc++){
4783     len_si[proc] = 0;
4784     if (proc == rank){
4785       len_s[proc] = 0;
4786     } else {
4787       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4788       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4789     }
4790     if (len_s[proc]) {
4791       merge->nsend++;
4792       nrows = 0;
4793       for (i=owners[proc]; i<owners[proc+1]; i++){
4794         if (ai[i+1] > ai[i]) nrows++;
4795       }
4796       len_si[proc] = 2*(nrows+1);
4797       len += len_si[proc];
4798     }
4799   }
4800 
4801   /* determine the number and length of messages to receive for ij-structure */
4802   /*-------------------------------------------------------------------------*/
4803   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4804   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4805 
4806   /* post the Irecv of j-structure */
4807   /*-------------------------------*/
4808   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4809   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4810 
4811   /* post the Isend of j-structure */
4812   /*--------------------------------*/
4813   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4814 
4815   for (proc=0, k=0; proc<size; proc++){
4816     if (!len_s[proc]) continue;
4817     i = owners[proc];
4818     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4819     k++;
4820   }
4821 
4822   /* receives and sends of j-structure are complete */
4823   /*------------------------------------------------*/
4824   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4825   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4826 
4827   /* send and recv i-structure */
4828   /*---------------------------*/
4829   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4830   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4831 
4832   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4833   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4834   for (proc=0,k=0; proc<size; proc++){
4835     if (!len_s[proc]) continue;
4836     /* form outgoing message for i-structure:
4837          buf_si[0]:                 nrows to be sent
4838                [1:nrows]:           row index (global)
4839                [nrows+1:2*nrows+1]: i-structure index
4840     */
4841     /*-------------------------------------------*/
4842     nrows = len_si[proc]/2 - 1;
4843     buf_si_i    = buf_si + nrows+1;
4844     buf_si[0]   = nrows;
4845     buf_si_i[0] = 0;
4846     nrows = 0;
4847     for (i=owners[proc]; i<owners[proc+1]; i++){
4848       anzi = ai[i+1] - ai[i];
4849       if (anzi) {
4850         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4851         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4852         nrows++;
4853       }
4854     }
4855     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4856     k++;
4857     buf_si += len_si[proc];
4858   }
4859 
4860   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4861   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4862 
4863   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4864   for (i=0; i<merge->nrecv; i++){
4865     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4866   }
4867 
4868   ierr = PetscFree(len_si);CHKERRQ(ierr);
4869   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4870   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4871   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4872   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4873   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4874   ierr = PetscFree(status);CHKERRQ(ierr);
4875 
4876   /* compute a local seq matrix in each processor */
4877   /*----------------------------------------------*/
4878   /* allocate bi array and free space for accumulating nonzero column info */
4879   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4880   bi[0] = 0;
4881 
4882   /* create and initialize a linked list */
4883   nlnk = N+1;
4884   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4885 
4886   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4887   len = 0;
4888   len  = ai[owners[rank+1]] - ai[owners[rank]];
4889   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4890   current_space = free_space;
4891 
4892   /* determine symbolic info for each local row */
4893   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4894 
4895   for (k=0; k<merge->nrecv; k++){
4896     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4897     nrows = *buf_ri_k[k];
4898     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4899     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4900   }
4901 
4902   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4903   len = 0;
4904   for (i=0;i<m;i++) {
4905     bnzi   = 0;
4906     /* add local non-zero cols of this proc's seqmat into lnk */
4907     arow   = owners[rank] + i;
4908     anzi   = ai[arow+1] - ai[arow];
4909     aj     = a->j + ai[arow];
4910     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4911     bnzi += nlnk;
4912     /* add received col data into lnk */
4913     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4914       if (i == *nextrow[k]) { /* i-th row */
4915         anzi = *(nextai[k]+1) - *nextai[k];
4916         aj   = buf_rj[k] + *nextai[k];
4917         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4918         bnzi += nlnk;
4919         nextrow[k]++; nextai[k]++;
4920       }
4921     }
4922     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4923 
4924     /* if free space is not available, make more free space */
4925     if (current_space->local_remaining<bnzi) {
4926       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4927       nspacedouble++;
4928     }
4929     /* copy data into free space, then initialize lnk */
4930     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4931     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4932 
4933     current_space->array           += bnzi;
4934     current_space->local_used      += bnzi;
4935     current_space->local_remaining -= bnzi;
4936 
4937     bi[i+1] = bi[i] + bnzi;
4938   }
4939 
4940   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4941 
4942   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4943   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4944   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4945 
4946   /* create symbolic parallel matrix B_mpi */
4947   /*---------------------------------------*/
4948     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4949   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4950   if (n==PETSC_DECIDE) {
4951     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4952   } else {
4953     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4954   }
4955   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4956   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4957   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4958   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4959   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4960 
4961   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4962   B_mpi->assembled     = PETSC_FALSE;
4963   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4964   merge->bi            = bi;
4965   merge->bj            = bj;
4966   merge->buf_ri        = buf_ri;
4967   merge->buf_rj        = buf_rj;
4968   merge->coi           = PETSC_NULL;
4969   merge->coj           = PETSC_NULL;
4970   merge->owners_co     = PETSC_NULL;
4971 
4972   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4973 
4974   /* attach the supporting struct to B_mpi for reuse */
4975   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4976   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4977   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4978   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4979   *mpimat = B_mpi;
4980 
4981   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4982   PetscFunctionReturn(0);
4983 }
4984 
4985 #undef __FUNCT__
4986 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4987 /*@C
4988       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4989                  matrices from each processor
4990 
4991     Collective on MPI_Comm
4992 
4993    Input Parameters:
4994 +    comm - the communicators the parallel matrix will live on
4995 .    seqmat - the input sequential matrices
4996 .    m - number of local rows (or PETSC_DECIDE)
4997 .    n - number of local columns (or PETSC_DECIDE)
4998 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4999 
5000    Output Parameter:
5001 .    mpimat - the parallel matrix generated
5002 
5003     Level: advanced
5004 
5005    Notes:
5006      The dimensions of the sequential matrix in each processor MUST be the same.
5007      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5008      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5009 @*/
5010 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5011 {
5012   PetscErrorCode   ierr;
5013   PetscMPIInt     size;
5014 
5015   PetscFunctionBegin;
5016   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5017   if (size == 1){
5018      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5019      if (scall == MAT_INITIAL_MATRIX){
5020        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5021      } else {
5022        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5023      }
5024      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5025      PetscFunctionReturn(0);
5026   }
5027   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5028   if (scall == MAT_INITIAL_MATRIX){
5029     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5030   }
5031   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5032   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5033   PetscFunctionReturn(0);
5034 }
5035 
5036 #undef __FUNCT__
5037 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5038 /*@
5039      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5040           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5041           with MatGetSize()
5042 
5043     Not Collective
5044 
5045    Input Parameters:
5046 +    A - the matrix
5047 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5048 
5049    Output Parameter:
5050 .    A_loc - the local sequential matrix generated
5051 
5052     Level: developer
5053 
5054 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5055 
5056 @*/
5057 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5058 {
5059   PetscErrorCode  ierr;
5060   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
5061   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5062   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5063   MatScalar       *aa=a->a,*ba=b->a,*cam;
5064   PetscScalar     *ca;
5065   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5066   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5067   PetscBool       match;
5068 
5069   PetscFunctionBegin;
5070   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5071   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5072   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5073   if (scall == MAT_INITIAL_MATRIX){
5074     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5075     ci[0] = 0;
5076     for (i=0; i<am; i++){
5077       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5078     }
5079     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5080     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5081     k = 0;
5082     for (i=0; i<am; i++) {
5083       ncols_o = bi[i+1] - bi[i];
5084       ncols_d = ai[i+1] - ai[i];
5085       /* off-diagonal portion of A */
5086       for (jo=0; jo<ncols_o; jo++) {
5087         col = cmap[*bj];
5088         if (col >= cstart) break;
5089         cj[k]   = col; bj++;
5090         ca[k++] = *ba++;
5091       }
5092       /* diagonal portion of A */
5093       for (j=0; j<ncols_d; j++) {
5094         cj[k]   = cstart + *aj++;
5095         ca[k++] = *aa++;
5096       }
5097       /* off-diagonal portion of A */
5098       for (j=jo; j<ncols_o; j++) {
5099         cj[k]   = cmap[*bj++];
5100         ca[k++] = *ba++;
5101       }
5102     }
5103     /* put together the new matrix */
5104     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5105     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5106     /* Since these are PETSc arrays, change flags to free them as necessary. */
5107     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5108     mat->free_a  = PETSC_TRUE;
5109     mat->free_ij = PETSC_TRUE;
5110     mat->nonew   = 0;
5111   } else if (scall == MAT_REUSE_MATRIX){
5112     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5113     ci = mat->i; cj = mat->j; cam = mat->a;
5114     for (i=0; i<am; i++) {
5115       /* off-diagonal portion of A */
5116       ncols_o = bi[i+1] - bi[i];
5117       for (jo=0; jo<ncols_o; jo++) {
5118         col = cmap[*bj];
5119         if (col >= cstart) break;
5120         *cam++ = *ba++; bj++;
5121       }
5122       /* diagonal portion of A */
5123       ncols_d = ai[i+1] - ai[i];
5124       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5125       /* off-diagonal portion of A */
5126       for (j=jo; j<ncols_o; j++) {
5127         *cam++ = *ba++; bj++;
5128       }
5129     }
5130   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5131   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5132   PetscFunctionReturn(0);
5133 }
5134 
5135 #undef __FUNCT__
5136 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5137 /*@C
5138      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5139 
5140     Not Collective
5141 
5142    Input Parameters:
5143 +    A - the matrix
5144 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5145 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5146 
5147    Output Parameter:
5148 .    A_loc - the local sequential matrix generated
5149 
5150     Level: developer
5151 
5152 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5153 
5154 @*/
5155 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5156 {
5157   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5158   PetscErrorCode    ierr;
5159   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5160   IS                isrowa,iscola;
5161   Mat               *aloc;
5162   PetscBool       match;
5163 
5164   PetscFunctionBegin;
5165   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5166   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5167   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5168   if (!row){
5169     start = A->rmap->rstart; end = A->rmap->rend;
5170     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5171   } else {
5172     isrowa = *row;
5173   }
5174   if (!col){
5175     start = A->cmap->rstart;
5176     cmap  = a->garray;
5177     nzA   = a->A->cmap->n;
5178     nzB   = a->B->cmap->n;
5179     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5180     ncols = 0;
5181     for (i=0; i<nzB; i++) {
5182       if (cmap[i] < start) idx[ncols++] = cmap[i];
5183       else break;
5184     }
5185     imark = i;
5186     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5187     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5188     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5189   } else {
5190     iscola = *col;
5191   }
5192   if (scall != MAT_INITIAL_MATRIX){
5193     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5194     aloc[0] = *A_loc;
5195   }
5196   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5197   *A_loc = aloc[0];
5198   ierr = PetscFree(aloc);CHKERRQ(ierr);
5199   if (!row){
5200     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5201   }
5202   if (!col){
5203     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5204   }
5205   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5206   PetscFunctionReturn(0);
5207 }
5208 
5209 #undef __FUNCT__
5210 #define __FUNCT__ "MatGetBrowsOfAcols"
5211 /*@C
5212     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5213 
5214     Collective on Mat
5215 
5216    Input Parameters:
5217 +    A,B - the matrices in mpiaij format
5218 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5219 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5220 
5221    Output Parameter:
5222 +    rowb, colb - index sets of rows and columns of B to extract
5223 -    B_seq - the sequential matrix generated
5224 
5225     Level: developer
5226 
5227 @*/
5228 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5229 {
5230   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5231   PetscErrorCode    ierr;
5232   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5233   IS                isrowb,iscolb;
5234   Mat               *bseq=PETSC_NULL;
5235 
5236   PetscFunctionBegin;
5237   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5238     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5239   }
5240   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5241 
5242   if (scall == MAT_INITIAL_MATRIX){
5243     start = A->cmap->rstart;
5244     cmap  = a->garray;
5245     nzA   = a->A->cmap->n;
5246     nzB   = a->B->cmap->n;
5247     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5248     ncols = 0;
5249     for (i=0; i<nzB; i++) {  /* row < local row index */
5250       if (cmap[i] < start) idx[ncols++] = cmap[i];
5251       else break;
5252     }
5253     imark = i;
5254     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5255     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5256     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5257     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5258   } else {
5259     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5260     isrowb = *rowb; iscolb = *colb;
5261     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5262     bseq[0] = *B_seq;
5263   }
5264   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5265   *B_seq = bseq[0];
5266   ierr = PetscFree(bseq);CHKERRQ(ierr);
5267   if (!rowb){
5268     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5269   } else {
5270     *rowb = isrowb;
5271   }
5272   if (!colb){
5273     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5274   } else {
5275     *colb = iscolb;
5276   }
5277   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5278   PetscFunctionReturn(0);
5279 }
5280 
5281 #undef __FUNCT__
5282 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5283 /*
5284     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5285     of the OFF-DIAGONAL portion of local A
5286 
5287     Collective on Mat
5288 
5289    Input Parameters:
5290 +    A,B - the matrices in mpiaij format
5291 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5292 
5293    Output Parameter:
5294 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5295 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5296 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5297 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5298 
5299     Level: developer
5300 
5301 */
5302 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5303 {
5304   VecScatter_MPI_General *gen_to,*gen_from;
5305   PetscErrorCode         ierr;
5306   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5307   Mat_SeqAIJ             *b_oth;
5308   VecScatter             ctx=a->Mvctx;
5309   MPI_Comm               comm=((PetscObject)ctx)->comm;
5310   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5311   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5312   PetscScalar            *rvalues,*svalues;
5313   MatScalar              *b_otha,*bufa,*bufA;
5314   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5315   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5316   MPI_Status             *sstatus,rstatus;
5317   PetscMPIInt            jj;
5318   PetscInt               *cols,sbs,rbs;
5319   PetscScalar            *vals;
5320 
5321   PetscFunctionBegin;
5322   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5323     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5324   }
5325   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5326   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5327 
5328   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5329   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5330   rvalues  = gen_from->values; /* holds the length of receiving row */
5331   svalues  = gen_to->values;   /* holds the length of sending row */
5332   nrecvs   = gen_from->n;
5333   nsends   = gen_to->n;
5334 
5335   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5336   srow     = gen_to->indices;   /* local row index to be sent */
5337   sstarts  = gen_to->starts;
5338   sprocs   = gen_to->procs;
5339   sstatus  = gen_to->sstatus;
5340   sbs      = gen_to->bs;
5341   rstarts  = gen_from->starts;
5342   rprocs   = gen_from->procs;
5343   rbs      = gen_from->bs;
5344 
5345   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5346   if (scall == MAT_INITIAL_MATRIX){
5347     /* i-array */
5348     /*---------*/
5349     /*  post receives */
5350     for (i=0; i<nrecvs; i++){
5351       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5352       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5353       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5354     }
5355 
5356     /* pack the outgoing message */
5357     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5358     sstartsj[0] = 0;  rstartsj[0] = 0;
5359     len = 0; /* total length of j or a array to be sent */
5360     k = 0;
5361     for (i=0; i<nsends; i++){
5362       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5363       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5364       for (j=0; j<nrows; j++) {
5365         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5366         for (l=0; l<sbs; l++){
5367           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5368           rowlen[j*sbs+l] = ncols;
5369           len += ncols;
5370           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5371         }
5372         k++;
5373       }
5374       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5375       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5376     }
5377     /* recvs and sends of i-array are completed */
5378     i = nrecvs;
5379     while (i--) {
5380       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5381     }
5382     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5383 
5384     /* allocate buffers for sending j and a arrays */
5385     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5386     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5387 
5388     /* create i-array of B_oth */
5389     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5390     b_othi[0] = 0;
5391     len = 0; /* total length of j or a array to be received */
5392     k = 0;
5393     for (i=0; i<nrecvs; i++){
5394       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5395       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5396       for (j=0; j<nrows; j++) {
5397         b_othi[k+1] = b_othi[k] + rowlen[j];
5398         len += rowlen[j]; k++;
5399       }
5400       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5401     }
5402 
5403     /* allocate space for j and a arrrays of B_oth */
5404     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5405     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5406 
5407     /* j-array */
5408     /*---------*/
5409     /*  post receives of j-array */
5410     for (i=0; i<nrecvs; i++){
5411       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5412       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5413     }
5414 
5415     /* pack the outgoing message j-array */
5416     k = 0;
5417     for (i=0; i<nsends; i++){
5418       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5419       bufJ = bufj+sstartsj[i];
5420       for (j=0; j<nrows; j++) {
5421         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5422         for (ll=0; ll<sbs; ll++){
5423           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5424           for (l=0; l<ncols; l++){
5425             *bufJ++ = cols[l];
5426           }
5427           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5428         }
5429       }
5430       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5431     }
5432 
5433     /* recvs and sends of j-array are completed */
5434     i = nrecvs;
5435     while (i--) {
5436       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5437     }
5438     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5439   } else if (scall == MAT_REUSE_MATRIX){
5440     sstartsj = *startsj_s;
5441     rstartsj = *startsj_r;
5442     bufa     = *bufa_ptr;
5443     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5444     b_otha   = b_oth->a;
5445   } else {
5446     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5447   }
5448 
5449   /* a-array */
5450   /*---------*/
5451   /*  post receives of a-array */
5452   for (i=0; i<nrecvs; i++){
5453     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5454     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5455   }
5456 
5457   /* pack the outgoing message a-array */
5458   k = 0;
5459   for (i=0; i<nsends; i++){
5460     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5461     bufA = bufa+sstartsj[i];
5462     for (j=0; j<nrows; j++) {
5463       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5464       for (ll=0; ll<sbs; ll++){
5465         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5466         for (l=0; l<ncols; l++){
5467           *bufA++ = vals[l];
5468         }
5469         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5470       }
5471     }
5472     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5473   }
5474   /* recvs and sends of a-array are completed */
5475   i = nrecvs;
5476   while (i--) {
5477     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5478   }
5479   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5480   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5481 
5482   if (scall == MAT_INITIAL_MATRIX){
5483     /* put together the new matrix */
5484     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5485 
5486     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5487     /* Since these are PETSc arrays, change flags to free them as necessary. */
5488     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5489     b_oth->free_a  = PETSC_TRUE;
5490     b_oth->free_ij = PETSC_TRUE;
5491     b_oth->nonew   = 0;
5492 
5493     ierr = PetscFree(bufj);CHKERRQ(ierr);
5494     if (!startsj_s || !bufa_ptr){
5495       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5496       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5497     } else {
5498       *startsj_s = sstartsj;
5499       *startsj_r = rstartsj;
5500       *bufa_ptr  = bufa;
5501     }
5502   }
5503   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5504   PetscFunctionReturn(0);
5505 }
5506 
5507 #undef __FUNCT__
5508 #define __FUNCT__ "MatGetCommunicationStructs"
5509 /*@C
5510   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5511 
5512   Not Collective
5513 
5514   Input Parameters:
5515 . A - The matrix in mpiaij format
5516 
5517   Output Parameter:
5518 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5519 . colmap - A map from global column index to local index into lvec
5520 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5521 
5522   Level: developer
5523 
5524 @*/
5525 #if defined (PETSC_USE_CTABLE)
5526 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5527 #else
5528 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5529 #endif
5530 {
5531   Mat_MPIAIJ *a;
5532 
5533   PetscFunctionBegin;
5534   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5535   PetscValidPointer(lvec, 2);
5536   PetscValidPointer(colmap, 3);
5537   PetscValidPointer(multScatter, 4);
5538   a = (Mat_MPIAIJ *) A->data;
5539   if (lvec) *lvec = a->lvec;
5540   if (colmap) *colmap = a->colmap;
5541   if (multScatter) *multScatter = a->Mvctx;
5542   PetscFunctionReturn(0);
5543 }
5544 
5545 EXTERN_C_BEGIN
5546 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5547 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5548 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5549 EXTERN_C_END
5550 
5551 #undef __FUNCT__
5552 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5553 /*
5554     Computes (B'*A')' since computing B*A directly is untenable
5555 
5556                n                       p                          p
5557         (              )       (              )         (                  )
5558       m (      A       )  *  n (       B      )   =   m (         C        )
5559         (              )       (              )         (                  )
5560 
5561 */
5562 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5563 {
5564   PetscErrorCode     ierr;
5565   Mat                At,Bt,Ct;
5566 
5567   PetscFunctionBegin;
5568   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5569   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5570   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5571   ierr = MatDestroy(&At);CHKERRQ(ierr);
5572   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5573   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5574   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5575   PetscFunctionReturn(0);
5576 }
5577 
5578 #undef __FUNCT__
5579 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5580 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5581 {
5582   PetscErrorCode ierr;
5583   PetscInt       m=A->rmap->n,n=B->cmap->n;
5584   Mat            Cmat;
5585 
5586   PetscFunctionBegin;
5587   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5588   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5589   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5590   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5591   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5592   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5593   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5594   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5595 
5596   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5597   *C = Cmat;
5598   PetscFunctionReturn(0);
5599 }
5600 
5601 /* ----------------------------------------------------------------*/
5602 #undef __FUNCT__
5603 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5604 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5605 {
5606   PetscErrorCode ierr;
5607 
5608   PetscFunctionBegin;
5609   if (scall == MAT_INITIAL_MATRIX){
5610     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5611   }
5612   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5613   PetscFunctionReturn(0);
5614 }
5615 
5616 EXTERN_C_BEGIN
5617 #if defined(PETSC_HAVE_MUMPS)
5618 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5619 #endif
5620 #if defined(PETSC_HAVE_PASTIX)
5621 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5622 #endif
5623 #if defined(PETSC_HAVE_SUPERLU_DIST)
5624 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5625 #endif
5626 #if defined(PETSC_HAVE_CLIQUE)
5627 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5628 #endif
5629 EXTERN_C_END
5630 
5631 /*MC
5632    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5633 
5634    Options Database Keys:
5635 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5636 
5637   Level: beginner
5638 
5639 .seealso: MatCreateAIJ()
5640 M*/
5641 
5642 EXTERN_C_BEGIN
5643 #undef __FUNCT__
5644 #define __FUNCT__ "MatCreate_MPIAIJ"
5645 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5646 {
5647   Mat_MPIAIJ     *b;
5648   PetscErrorCode ierr;
5649   PetscMPIInt    size;
5650 
5651   PetscFunctionBegin;
5652   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5653   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5654   B->data         = (void*)b;
5655   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5656   B->assembled    = PETSC_FALSE;
5657   B->insertmode   = NOT_SET_VALUES;
5658   b->size         = size;
5659   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5660 
5661   /* build cache for off array entries formed */
5662   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5663   b->donotstash  = PETSC_FALSE;
5664   b->colmap      = 0;
5665   b->garray      = 0;
5666   b->roworiented = PETSC_TRUE;
5667 
5668   /* stuff used for matrix vector multiply */
5669   b->lvec      = PETSC_NULL;
5670   b->Mvctx     = PETSC_NULL;
5671 
5672   /* stuff for MatGetRow() */
5673   b->rowindices   = 0;
5674   b->rowvalues    = 0;
5675   b->getrowactive = PETSC_FALSE;
5676 
5677   /* flexible pointer used in CUSP/CUSPARSE classes */
5678   b->spptr        = PETSC_NULL;
5679 
5680 #if defined(PETSC_HAVE_MUMPS)
5681   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5682                                      "MatGetFactor_aij_mumps",
5683                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5684 #endif
5685 #if defined(PETSC_HAVE_PASTIX)
5686   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5687 					   "MatGetFactor_mpiaij_pastix",
5688 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5689 #endif
5690 #if defined(PETSC_HAVE_SUPERLU_DIST)
5691   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5692                                      "MatGetFactor_mpiaij_superlu_dist",
5693                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5694 #endif
5695 #if defined(PETSC_HAVE_CLIQUE)
5696   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5697                                      "MatGetFactor_aij_clique",
5698                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5699 #endif
5700   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5701                                      "MatStoreValues_MPIAIJ",
5702                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5703   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5704                                      "MatRetrieveValues_MPIAIJ",
5705                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5706   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5707 				     "MatGetDiagonalBlock_MPIAIJ",
5708                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5709   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5710 				     "MatIsTranspose_MPIAIJ",
5711 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5712   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5713 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5714 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5715   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5716 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5717 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5718   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5719 				     "MatDiagonalScaleLocal_MPIAIJ",
5720 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5721   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5722                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5723                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5724   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5725                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5726                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5727   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5728                                      "MatConvert_MPIAIJ_MPISBAIJ",
5729                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5730   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5731                                      "MatMatMult_MPIDense_MPIAIJ",
5732                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5733   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5734                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5735                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5736   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5737                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5738                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5739   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5740   PetscFunctionReturn(0);
5741 }
5742 EXTERN_C_END
5743 
5744 #undef __FUNCT__
5745 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5746 /*@
5747      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5748          and "off-diagonal" part of the matrix in CSR format.
5749 
5750    Collective on MPI_Comm
5751 
5752    Input Parameters:
5753 +  comm - MPI communicator
5754 .  m - number of local rows (Cannot be PETSC_DECIDE)
5755 .  n - This value should be the same as the local size used in creating the
5756        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5757        calculated if N is given) For square matrices n is almost always m.
5758 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5759 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5760 .   i - row indices for "diagonal" portion of matrix
5761 .   j - column indices
5762 .   a - matrix values
5763 .   oi - row indices for "off-diagonal" portion of matrix
5764 .   oj - column indices
5765 -   oa - matrix values
5766 
5767    Output Parameter:
5768 .   mat - the matrix
5769 
5770    Level: advanced
5771 
5772    Notes:
5773        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5774        must free the arrays once the matrix has been destroyed and not before.
5775 
5776        The i and j indices are 0 based
5777 
5778        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5779 
5780        This sets local rows and cannot be used to set off-processor values.
5781 
5782        You cannot later use MatSetValues() to change values in this matrix.
5783 
5784 .keywords: matrix, aij, compressed row, sparse, parallel
5785 
5786 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5787           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5788 @*/
5789 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5790 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5791 {
5792   PetscErrorCode ierr;
5793   Mat_MPIAIJ     *maij;
5794 
5795  PetscFunctionBegin;
5796   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5797   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5798   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5799   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5800   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5801   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5802   maij = (Mat_MPIAIJ*) (*mat)->data;
5803   maij->donotstash     = PETSC_TRUE;
5804   (*mat)->preallocated = PETSC_TRUE;
5805 
5806   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5807   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5808 
5809   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5810   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5811 
5812   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5813   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5814   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5815   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5816 
5817   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5818   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5819   PetscFunctionReturn(0);
5820 }
5821 
5822 /*
5823     Special version for direct calls from Fortran
5824 */
5825 #include <petsc-private/fortranimpl.h>
5826 
5827 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5828 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5829 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5830 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5831 #endif
5832 
5833 /* Change these macros so can be used in void function */
5834 #undef CHKERRQ
5835 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5836 #undef SETERRQ2
5837 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5838 #undef SETERRQ3
5839 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5840 #undef SETERRQ
5841 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5842 
5843 EXTERN_C_BEGIN
5844 #undef __FUNCT__
5845 #define __FUNCT__ "matsetvaluesmpiaij_"
5846 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5847 {
5848   Mat             mat = *mmat;
5849   PetscInt        m = *mm, n = *mn;
5850   InsertMode      addv = *maddv;
5851   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5852   PetscScalar     value;
5853   PetscErrorCode  ierr;
5854 
5855   MatCheckPreallocated(mat,1);
5856   if (mat->insertmode == NOT_SET_VALUES) {
5857     mat->insertmode = addv;
5858   }
5859 #if defined(PETSC_USE_DEBUG)
5860   else if (mat->insertmode != addv) {
5861     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5862   }
5863 #endif
5864   {
5865   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5866   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5867   PetscBool       roworiented = aij->roworiented;
5868 
5869   /* Some Variables required in the macro */
5870   Mat             A = aij->A;
5871   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5872   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5873   MatScalar       *aa = a->a;
5874   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5875   Mat             B = aij->B;
5876   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5877   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5878   MatScalar       *ba = b->a;
5879 
5880   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5881   PetscInt        nonew = a->nonew;
5882   MatScalar       *ap1,*ap2;
5883 
5884   PetscFunctionBegin;
5885   for (i=0; i<m; i++) {
5886     if (im[i] < 0) continue;
5887 #if defined(PETSC_USE_DEBUG)
5888     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5889 #endif
5890     if (im[i] >= rstart && im[i] < rend) {
5891       row      = im[i] - rstart;
5892       lastcol1 = -1;
5893       rp1      = aj + ai[row];
5894       ap1      = aa + ai[row];
5895       rmax1    = aimax[row];
5896       nrow1    = ailen[row];
5897       low1     = 0;
5898       high1    = nrow1;
5899       lastcol2 = -1;
5900       rp2      = bj + bi[row];
5901       ap2      = ba + bi[row];
5902       rmax2    = bimax[row];
5903       nrow2    = bilen[row];
5904       low2     = 0;
5905       high2    = nrow2;
5906 
5907       for (j=0; j<n; j++) {
5908         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5909         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5910         if (in[j] >= cstart && in[j] < cend){
5911           col = in[j] - cstart;
5912           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5913         } else if (in[j] < 0) continue;
5914 #if defined(PETSC_USE_DEBUG)
5915         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5916 #endif
5917         else {
5918           if (mat->was_assembled) {
5919             if (!aij->colmap) {
5920               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5921             }
5922 #if defined (PETSC_USE_CTABLE)
5923             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5924 	    col--;
5925 #else
5926             col = aij->colmap[in[j]] - 1;
5927 #endif
5928             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5929               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5930               col =  in[j];
5931               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5932               B = aij->B;
5933               b = (Mat_SeqAIJ*)B->data;
5934               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5935               rp2      = bj + bi[row];
5936               ap2      = ba + bi[row];
5937               rmax2    = bimax[row];
5938               nrow2    = bilen[row];
5939               low2     = 0;
5940               high2    = nrow2;
5941               bm       = aij->B->rmap->n;
5942               ba = b->a;
5943             }
5944           } else col = in[j];
5945           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5946         }
5947       }
5948     } else {
5949       if (!aij->donotstash) {
5950         if (roworiented) {
5951           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5952         } else {
5953           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5954         }
5955       }
5956     }
5957   }}
5958   PetscFunctionReturnVoid();
5959 }
5960 EXTERN_C_END
5961 
5962