xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 63f4a73203df8b9a6f68ded592474eee925d3e46)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233 	for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236 	  jj++;
237 	}
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267 	for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270 	  jj++;
271 	}
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++){
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else             low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414 { \
415     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416     lastcol2 = col;\
417     while (high2-low2 > 5) { \
418       t = (low2+high2)/2; \
419       if (rp2[t] > col) high2 = t; \
420       else             low2  = t; \
421     } \
422     for (_i=low2; _i<high2; _i++) {		\
423       if (rp2[_i] > col) break;			\
424       if (rp2[_i] == col) {			      \
425 	if (addv == ADD_VALUES) ap2[_i] += value;     \
426 	else                    ap2[_i] = value;      \
427 	goto b_noinsert;			      \
428       }						      \
429     }							      \
430     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
432     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434     N = nrow2++ - 1; b->nz++; high2++;					\
435     /* shift up all the later entries in this row */			\
436     for (ii=N; ii>=_i; ii--) {						\
437       rp2[ii+1] = rp2[ii];						\
438       ap2[ii+1] = ap2[ii];						\
439     }									\
440     rp2[_i] = col;							\
441     ap2[_i] = value;							\
442     b_noinsert: ;								\
443     bilen[row] = nrow2;							\
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat            A = aij->A;
487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
488   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar      *aa = a->a;
490   PetscBool      ignorezeroentries = a->ignorezeroentries;
491   Mat            B = aij->B;
492   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
493   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar      *ba = b->a;
495 
496   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt       nonew;
498   MatScalar      *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend){
528           col = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined (PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542 	    col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B = aij->B;
551               b = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2      = bj + bi[row];
554               ap2      = ba + bi[row];
555               rmax2    = bimax[row];
556               nrow2    = bilen[row];
557               low2     = 0;
558               high2    = nrow2;
559               bm       = aij->B->rmap->n;
560               ba = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend){
601           col = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined (PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col --;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       nstash,reallocs;
635   InsertMode     addv;
636 
637   PetscFunctionBegin;
638   if (aij->donotstash || mat->nooffprocentries) {
639     PetscFunctionReturn(0);
640   }
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n;) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724 {
725   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
730   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatZeroRows_MPIAIJ"
736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737 {
738   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode    ierr;
740   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741   PetscInt          i,*owners = A->rmap->range;
742   PetscInt          *nprocs,j,idx,nsends,row;
743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
744   PetscInt          *rvalues,count,base,slen,*source;
745   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
746   MPI_Comm          comm = ((PetscObject)A)->comm;
747   MPI_Request       *send_waits,*recv_waits;
748   MPI_Status        recv_status,*send_status;
749   const PetscScalar *xx;
750   PetscScalar       *bb;
751 #if defined(PETSC_DEBUG)
752   PetscBool      found = PETSC_FALSE;
753 #endif
754 
755   PetscFunctionBegin;
756   /*  first count number of contributors to each processor */
757   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
758   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
759   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
760   j = 0;
761   for (i=0; i<N; i++) {
762     if (lastidx > (idx = rows[i])) j = 0;
763     lastidx = idx;
764     for (; j<size; j++) {
765       if (idx >= owners[j] && idx < owners[j+1]) {
766         nprocs[2*j]++;
767         nprocs[2*j+1] = 1;
768         owner[i] = j;
769 #if defined(PETSC_DEBUG)
770         found = PETSC_TRUE;
771 #endif
772         break;
773       }
774     }
775 #if defined(PETSC_DEBUG)
776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777     found = PETSC_FALSE;
778 #endif
779   }
780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
781 
782   if (A->nooffproczerorows) {
783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784     nrecvs = nsends;
785     nmax   = N;
786   } else {
787     /* inform other processors of number of messages and max length*/
788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
789   }
790 
791   /* post receives:   */
792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
794   for (i=0; i<nrecvs; i++) {
795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
796   }
797 
798   /* do sends:
799       1) starts[i] gives the starting index in svalues for stuff going to
800          the ith processor
801   */
802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
805   starts[0] = 0;
806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807   for (i=0; i<N; i++) {
808     svalues[starts[owner[i]]++] = rows[i];
809   }
810 
811   starts[0] = 0;
812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813   count = 0;
814   for (i=0; i<size; i++) {
815     if (nprocs[2*i+1]) {
816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
817     }
818   }
819   ierr = PetscFree(starts);CHKERRQ(ierr);
820 
821   base = owners[rank];
822 
823   /*  wait on receives */
824   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
825   count  = nrecvs; slen = 0;
826   while (count) {
827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
828     /* unpack receives into our local space */
829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
830     source[imdex]  = recv_status.MPI_SOURCE;
831     lens[imdex]    = n;
832     slen          += n;
833     count--;
834   }
835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
836 
837   /* move the data into the send scatter */
838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
839   count = 0;
840   for (i=0; i<nrecvs; i++) {
841     values = rvalues + i*nmax;
842     for (j=0; j<lens[i]; j++) {
843       lrows[count++] = values[j] - base;
844     }
845   }
846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
848   ierr = PetscFree(owner);CHKERRQ(ierr);
849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
850 
851   /* fix right hand side if needed */
852   if (x && b) {
853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
855     for (i=0; i<slen; i++) {
856       bb[lrows[i]] = diag*xx[lrows[i]];
857     }
858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
860   }
861   /*
862         Zero the required rows. If the "diagonal block" of the matrix
863      is square and the user wishes to set the diagonal we use separate
864      code so that MatSetValues() is not called for each diagonal allocating
865      new memory, thus calling lots of mallocs and slowing things down.
866 
867   */
868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
872   } else if (diag != 0.0) {
873     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877     }
878     for (i = 0; i < slen; i++) {
879       row  = lrows[i] + rstart;
880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   } else {
885     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(lrows);CHKERRQ(ierr);
888 
889   /* wait on sends */
890   if (nsends) {
891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
893     ierr = PetscFree(send_status);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
896   ierr = PetscFree(svalues);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903 {
904   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
905   PetscErrorCode    ierr;
906   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907   PetscInt          i,*owners = A->rmap->range;
908   PetscInt          *nprocs,j,idx,nsends;
909   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
910   PetscInt          *rvalues,count,base,slen,*source;
911   PetscInt          *lens,*lrows,*values,m;
912   MPI_Comm          comm = ((PetscObject)A)->comm;
913   MPI_Request       *send_waits,*recv_waits;
914   MPI_Status        recv_status,*send_status;
915   const PetscScalar *xx;
916   PetscScalar       *bb,*mask;
917   Vec               xmask,lmask;
918   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
919   const PetscInt    *aj, *ii,*ridx;
920   PetscScalar       *aa;
921 #if defined(PETSC_DEBUG)
922   PetscBool         found = PETSC_FALSE;
923 #endif
924 
925   PetscFunctionBegin;
926   /*  first count number of contributors to each processor */
927   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
928   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
929   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
930   j = 0;
931   for (i=0; i<N; i++) {
932     if (lastidx > (idx = rows[i])) j = 0;
933     lastidx = idx;
934     for (; j<size; j++) {
935       if (idx >= owners[j] && idx < owners[j+1]) {
936         nprocs[2*j]++;
937         nprocs[2*j+1] = 1;
938         owner[i] = j;
939 #if defined(PETSC_DEBUG)
940         found = PETSC_TRUE;
941 #endif
942         break;
943       }
944     }
945 #if defined(PETSC_DEBUG)
946     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947     found = PETSC_FALSE;
948 #endif
949   }
950   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
951 
952   /* inform other processors of number of messages and max length*/
953   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
954 
955   /* post receives:   */
956   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
957   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
958   for (i=0; i<nrecvs; i++) {
959     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
960   }
961 
962   /* do sends:
963       1) starts[i] gives the starting index in svalues for stuff going to
964          the ith processor
965   */
966   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
967   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
968   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
969   starts[0] = 0;
970   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971   for (i=0; i<N; i++) {
972     svalues[starts[owner[i]]++] = rows[i];
973   }
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count  = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994     source[imdex]  = recv_status.MPI_SOURCE;
995     lens[imdex]    = n;
996     slen          += n;
997     count--;
998   }
999   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1000 
1001   /* move the data into the send scatter */
1002   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1003   count = 0;
1004   for (i=0; i<nrecvs; i++) {
1005     values = rvalues + i*nmax;
1006     for (j=0; j<lens[i]; j++) {
1007       lrows[count++] = values[j] - base;
1008     }
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) {
1024     bb[lrows[i]] = 1;
1025   }
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use){
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++){
1050       n   = ii[i+1] - ii[i];
1051       aj  = aij->j + ii[i];
1052       aa  = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa        = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n   = ii[i+1] - ii[i];
1068       aj  = aij->j + ii[i];
1069       aa  = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa    = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190 
1191   /* Easy test: symmetric diagonal block */
1192   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1194   if (!*f) PetscFunctionReturn(0);
1195   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1197   if (size == 1) PetscFunctionReturn(0);
1198 
1199   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1201   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1202   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1203   for (i=0; i<first; i++) notme[i] = i;
1204   for (i=last; i<M; i++) notme[i-last+first] = i;
1205   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1206   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1207   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1208   Aoff = Aoffs[0];
1209   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1210   Boff = Boffs[0];
1211   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1213   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1215   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1216   ierr = PetscFree(notme);CHKERRQ(ierr);
1217    PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   /* do nondiagonal part */
1230   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1231   /* send it on its way */
1232   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   /* do local part */
1234   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1235   /* receive remote parts */
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247 {
1248   PetscErrorCode ierr;
1249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1250 
1251   PetscFunctionBegin;
1252   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatScale_MPIAIJ"
1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261 {
1262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1267   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDestroy_MPIAIJ"
1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274 {
1275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279 #if defined(PETSC_USE_LOG)
1280   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281 #endif
1282   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1283   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1284   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1288 #else
1289   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1292   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1293   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1294   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1295   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1316   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1319   int               fd;
1320   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1321   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322   PetscScalar       *column_values;
1323   PetscInt          message_count,flowcontrolcount;
1324   FILE              *file;
1325 
1326   PetscFunctionBegin;
1327   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1328   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1329   nz   = A->nz + B->nz;
1330   if (!rank) {
1331     header[0] = MAT_FILE_CLASSID;
1332     header[1] = mat->rmap->N;
1333     header[2] = mat->cmap->N;
1334     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1335     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1336     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     /* get largest number of rows any processor has */
1338     rlen = mat->rmap->n;
1339     range = mat->rmap->range;
1340     for (i=1; i<size; i++) {
1341       rlen = PetscMax(rlen,range[i+1] - range[i]);
1342     }
1343   } else {
1344     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     rlen = mat->rmap->n;
1346   }
1347 
1348   /* load up the local row counts */
1349   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1350   for (i=0; i<mat->rmap->n; i++) {
1351     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352   }
1353 
1354   /* store the row lengths to the file */
1355   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1356   if (!rank) {
1357     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     for (i=1; i<size; i++) {
1359       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1360       rlen = range[i+1] - range[i];
1361       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1362       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1363     }
1364     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1367     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1368     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1369   }
1370   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1371 
1372   /* load up the local column indices */
1373   nzmax = nz; /* )th processor needs space a largest processor needs */
1374   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1376   cnt  = 0;
1377   for (i=0; i<mat->rmap->n; i++) {
1378     for (j=B->i[i]; j<B->i[i+1]; j++) {
1379       if ( (col = garray[B->j[j]]) > cstart) break;
1380       column_indices[cnt++] = col;
1381     }
1382     for (k=A->i[i]; k<A->i[i+1]; k++) {
1383       column_indices[cnt++] = A->j[k] + cstart;
1384     }
1385     for (; j<B->i[i+1]; j++) {
1386       column_indices[cnt++] = garray[B->j[j]];
1387     }
1388   }
1389   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1390 
1391   /* store the column indices to the file */
1392    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1393   if (!rank) {
1394     MPI_Status status;
1395     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1396     for (i=1; i<size; i++) {
1397       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1398       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1399       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1400       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1401       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1402     }
1403      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1406     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1408     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1409   }
1410   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1411 
1412   /* load up the local column values */
1413   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1414   cnt  = 0;
1415   for (i=0; i<mat->rmap->n; i++) {
1416     for (j=B->i[i]; j<B->i[i+1]; j++) {
1417       if ( garray[B->j[j]] > cstart) break;
1418       column_values[cnt++] = B->a[j];
1419     }
1420     for (k=A->i[i]; k<A->i[i+1]; k++) {
1421       column_values[cnt++] = A->a[k];
1422     }
1423     for (; j<B->i[i+1]; j++) {
1424       column_values[cnt++] = B->a[j];
1425     }
1426   }
1427   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1428 
1429   /* store the column values to the file */
1430    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1431   if (!rank) {
1432     MPI_Status status;
1433     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1434     for (i=1; i<size; i++) {
1435        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1436       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1437       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1438       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1439       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1440     }
1441     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1442   } else {
1443     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1444     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1445     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1446     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(column_values);CHKERRQ(ierr);
1449 
1450   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1451   if (file) {
1452     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1461 {
1462   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1463   PetscErrorCode    ierr;
1464   PetscMPIInt       rank = aij->rank,size = aij->size;
1465   PetscBool         isdraw,iascii,isbinary;
1466   PetscViewer       sviewer;
1467   PetscViewerFormat format;
1468 
1469   PetscFunctionBegin;
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1473   if (iascii) {
1474     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1475     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1476       MatInfo    info;
1477       PetscBool  inodes;
1478 
1479       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1480       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1482       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1483       if (!inodes) {
1484         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1485 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1486       } else {
1487         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1488 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1489       }
1490       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1492       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1493       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1494       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1495       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1497       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1498       PetscFunctionReturn(0);
1499     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1500       PetscInt   inodecount,inodelimit,*inodes;
1501       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1502       if (inodes) {
1503         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1504       } else {
1505         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1506       }
1507       PetscFunctionReturn(0);
1508     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510     }
1511   } else if (isbinary) {
1512     if (size == 1) {
1513       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1515     } else {
1516       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1517     }
1518     PetscFunctionReturn(0);
1519   } else if (isdraw) {
1520     PetscDraw  draw;
1521     PetscBool  isnull;
1522     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1523     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1524   }
1525 
1526   if (size == 1) {
1527     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1528     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1529   } else {
1530     /* assemble the entire matrix onto first processor. */
1531     Mat         A;
1532     Mat_SeqAIJ  *Aloc;
1533     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1534     MatScalar   *a;
1535 
1536     if (mat->rmap->N > 1024) {
1537       PetscBool  flg = PETSC_FALSE;
1538 
1539       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1540       if (!flg) {
1541         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1542       }
1543     }
1544 
1545     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1546     if (!rank) {
1547       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1548     } else {
1549       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1550     }
1551     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1552     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1553     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1554     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1555     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1556 
1557     /* copy over the A part */
1558     Aloc = (Mat_SeqAIJ*)aij->A->data;
1559     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1560     row = mat->rmap->rstart;
1561     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1562     for (i=0; i<m; i++) {
1563       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1564       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1565     }
1566     aj = Aloc->j;
1567     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1568 
1569     /* copy over the B part */
1570     Aloc = (Mat_SeqAIJ*)aij->B->data;
1571     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1572     row  = mat->rmap->rstart;
1573     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1574     ct   = cols;
1575     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1576     for (i=0; i<m; i++) {
1577       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1578       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1579     }
1580     ierr = PetscFree(ct);CHKERRQ(ierr);
1581     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583     /*
1584        Everyone has to call to draw the matrix since the graphics waits are
1585        synchronized across all processors that share the PetscDraw object
1586     */
1587     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1588     if (!rank) {
1589       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1590       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1591       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1592       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1593     }
1594     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1595     ierr = MatDestroy(&A);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatView_MPIAIJ"
1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1603 {
1604   PetscErrorCode ierr;
1605   PetscBool      iascii,isdraw,issocket,isbinary;
1606 
1607   PetscFunctionBegin;
1608   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1609   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1612   if (iascii || isdraw || isbinary || issocket) {
1613     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1614   } else {
1615     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatSOR_MPIAIJ"
1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   PetscErrorCode ierr;
1626   Vec            bb1 = 0;
1627   PetscBool      hasop;
1628 
1629   PetscFunctionBegin;
1630   if (flag == SOR_APPLY_UPPER) {
1631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1632     PetscFunctionReturn(0);
1633   }
1634 
1635   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1636     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1637   }
1638 
1639   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644 
1645     while (its--) {
1646       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648 
1649       /* update rhs: bb1 = bb - B*x */
1650       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1651       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1652 
1653       /* local sweep */
1654       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1655     }
1656   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1657     if (flag & SOR_ZERO_INITIAL_GUESS) {
1658       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1659       its--;
1660     }
1661     while (its--) {
1662       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664 
1665       /* update rhs: bb1 = bb - B*x */
1666       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1667       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1668 
1669       /* local sweep */
1670       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1671     }
1672   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1673     if (flag & SOR_ZERO_INITIAL_GUESS) {
1674       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1675       its--;
1676     }
1677     while (its--) {
1678       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680 
1681       /* update rhs: bb1 = bb - B*x */
1682       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1683       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1684 
1685       /* local sweep */
1686       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1687     }
1688   }  else if (flag & SOR_EISENSTAT) {
1689     Vec         xx1;
1690 
1691     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1693 
1694     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696     if (!mat->diag) {
1697       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1698       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1699     }
1700     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1701     if (hasop) {
1702       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1703     } else {
1704       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1705     }
1706     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1707 
1708     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1709 
1710     /* local sweep */
1711     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1712     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1713     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1715 
1716   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatPermute_MPIAIJ"
1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1723 {
1724   MPI_Comm       comm;
1725   PetscInt       first,local_rowsize,local_colsize;
1726   const PetscInt *rows;
1727   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1732   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1733   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1734   /* collect the global row permutation and invert it */
1735   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1736   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1737   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1738   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1739   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1740   /* get the local target indices */
1741   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1742   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1743   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1744   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1745   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1746   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1747   /* the column permutation is so much easier;
1748      make a local version of 'colp' and invert it */
1749   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1750   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1751   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1752   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1753   /* now we just get the submatrix */
1754   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1755   /* clean up */
1756   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1757   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1763 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1764 {
1765   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1766   Mat            A = mat->A,B = mat->B;
1767   PetscErrorCode ierr;
1768   PetscReal      isend[5],irecv[5];
1769 
1770   PetscFunctionBegin;
1771   info->block_size     = 1.0;
1772   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1773   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1774   isend[3] = info->memory;  isend[4] = info->mallocs;
1775   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1776   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1777   isend[3] += info->memory;  isend[4] += info->mallocs;
1778   if (flag == MAT_LOCAL) {
1779     info->nz_used      = isend[0];
1780     info->nz_allocated = isend[1];
1781     info->nz_unneeded  = isend[2];
1782     info->memory       = isend[3];
1783     info->mallocs      = isend[4];
1784   } else if (flag == MAT_GLOBAL_MAX) {
1785     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1786     info->nz_used      = irecv[0];
1787     info->nz_allocated = irecv[1];
1788     info->nz_unneeded  = irecv[2];
1789     info->memory       = irecv[3];
1790     info->mallocs      = irecv[4];
1791   } else if (flag == MAT_GLOBAL_SUM) {
1792     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1793     info->nz_used      = irecv[0];
1794     info->nz_allocated = irecv[1];
1795     info->nz_unneeded  = irecv[2];
1796     info->memory       = irecv[3];
1797     info->mallocs      = irecv[4];
1798   }
1799   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1800   info->fill_ratio_needed = 0;
1801   info->factor_mallocs    = 0;
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatSetOption_MPIAIJ"
1807 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1808 {
1809   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1810   PetscErrorCode ierr;
1811 
1812   PetscFunctionBegin;
1813   switch (op) {
1814   case MAT_NEW_NONZERO_LOCATIONS:
1815   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1816   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1817   case MAT_KEEP_NONZERO_PATTERN:
1818   case MAT_NEW_NONZERO_LOCATION_ERR:
1819   case MAT_USE_INODES:
1820   case MAT_IGNORE_ZERO_ENTRIES:
1821     MatCheckPreallocated(A,1);
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1824     break;
1825   case MAT_ROW_ORIENTED:
1826     a->roworiented = flg;
1827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1828     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1829     break;
1830   case MAT_NEW_DIAGONALS:
1831     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1832     break;
1833   case MAT_IGNORE_OFF_PROC_ENTRIES:
1834     a->donotstash = flg;
1835     break;
1836   case MAT_SPD:
1837     A->spd_set                         = PETSC_TRUE;
1838     A->spd                             = flg;
1839     if (flg) {
1840       A->symmetric                     = PETSC_TRUE;
1841       A->structurally_symmetric        = PETSC_TRUE;
1842       A->symmetric_set                 = PETSC_TRUE;
1843       A->structurally_symmetric_set    = PETSC_TRUE;
1844     }
1845     break;
1846   case MAT_SYMMETRIC:
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     break;
1849   case MAT_STRUCTURALLY_SYMMETRIC:
1850     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1851     break;
1852   case MAT_HERMITIAN:
1853     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1854     break;
1855   case MAT_SYMMETRY_ETERNAL:
1856     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1857     break;
1858   default:
1859     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1860   }
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "MatGetRow_MPIAIJ"
1866 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1867 {
1868   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1869   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1870   PetscErrorCode ierr;
1871   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1872   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1873   PetscInt       *cmap,*idx_p;
1874 
1875   PetscFunctionBegin;
1876   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1877   mat->getrowactive = PETSC_TRUE;
1878 
1879   if (!mat->rowvalues && (idx || v)) {
1880     /*
1881         allocate enough space to hold information from the longest row.
1882     */
1883     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1884     PetscInt   max = 1,tmp;
1885     for (i=0; i<matin->rmap->n; i++) {
1886       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1887       if (max < tmp) { max = tmp; }
1888     }
1889     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1890   }
1891 
1892   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1893   lrow = row - rstart;
1894 
1895   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1896   if (!v)   {pvA = 0; pvB = 0;}
1897   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1898   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1899   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1900   nztot = nzA + nzB;
1901 
1902   cmap  = mat->garray;
1903   if (v  || idx) {
1904     if (nztot) {
1905       /* Sort by increasing column numbers, assuming A and B already sorted */
1906       PetscInt imark = -1;
1907       if (v) {
1908         *v = v_p = mat->rowvalues;
1909         for (i=0; i<nzB; i++) {
1910           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1911           else break;
1912         }
1913         imark = i;
1914         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1915         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1916       }
1917       if (idx) {
1918         *idx = idx_p = mat->rowindices;
1919         if (imark > -1) {
1920           for (i=0; i<imark; i++) {
1921             idx_p[i] = cmap[cworkB[i]];
1922           }
1923         } else {
1924           for (i=0; i<nzB; i++) {
1925             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1926             else break;
1927           }
1928           imark = i;
1929         }
1930         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1931         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1932       }
1933     } else {
1934       if (idx) *idx = 0;
1935       if (v)   *v   = 0;
1936     }
1937   }
1938   *nz = nztot;
1939   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1940   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 #undef __FUNCT__
1945 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1946 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1947 {
1948   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1949 
1950   PetscFunctionBegin;
1951   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1952   aij->getrowactive = PETSC_FALSE;
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "MatNorm_MPIAIJ"
1958 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1959 {
1960   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1961   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1962   PetscErrorCode ierr;
1963   PetscInt       i,j,cstart = mat->cmap->rstart;
1964   PetscReal      sum = 0.0;
1965   MatScalar      *v;
1966 
1967   PetscFunctionBegin;
1968   if (aij->size == 1) {
1969     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1970   } else {
1971     if (type == NORM_FROBENIUS) {
1972       v = amat->a;
1973       for (i=0; i<amat->nz; i++) {
1974 #if defined(PETSC_USE_COMPLEX)
1975         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1976 #else
1977         sum += (*v)*(*v); v++;
1978 #endif
1979       }
1980       v = bmat->a;
1981       for (i=0; i<bmat->nz; i++) {
1982 #if defined(PETSC_USE_COMPLEX)
1983         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1984 #else
1985         sum += (*v)*(*v); v++;
1986 #endif
1987       }
1988       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1989       *norm = PetscSqrtReal(*norm);
1990     } else if (type == NORM_1) { /* max column norm */
1991       PetscReal *tmp,*tmp2;
1992       PetscInt  *jj,*garray = aij->garray;
1993       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1994       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1995       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1996       *norm = 0.0;
1997       v = amat->a; jj = amat->j;
1998       for (j=0; j<amat->nz; j++) {
1999         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2000       }
2001       v = bmat->a; jj = bmat->j;
2002       for (j=0; j<bmat->nz; j++) {
2003         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2004       }
2005       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2006       for (j=0; j<mat->cmap->N; j++) {
2007         if (tmp2[j] > *norm) *norm = tmp2[j];
2008       }
2009       ierr = PetscFree(tmp);CHKERRQ(ierr);
2010       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2011     } else if (type == NORM_INFINITY) { /* max row norm */
2012       PetscReal ntemp = 0.0;
2013       for (j=0; j<aij->A->rmap->n; j++) {
2014         v = amat->a + amat->i[j];
2015         sum = 0.0;
2016         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2017           sum += PetscAbsScalar(*v); v++;
2018         }
2019         v = bmat->a + bmat->i[j];
2020         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2021           sum += PetscAbsScalar(*v); v++;
2022         }
2023         if (sum > ntemp) ntemp = sum;
2024       }
2025       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2026     } else {
2027       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2028     }
2029   }
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatTranspose_MPIAIJ"
2035 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2036 {
2037   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2038   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2039   PetscErrorCode ierr;
2040   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2041   PetscInt       cstart=A->cmap->rstart,ncol;
2042   Mat            B;
2043   MatScalar      *array;
2044 
2045   PetscFunctionBegin;
2046   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2047 
2048   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2049   ai = Aloc->i; aj = Aloc->j;
2050   bi = Bloc->i; bj = Bloc->j;
2051   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2052     PetscInt *d_nnz,*g_nnz,*o_nnz;
2053     PetscSFNode *oloc;
2054     PetscSF sf;
2055 
2056     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2057     /* compute d_nnz for preallocation */
2058     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2059     for (i=0; i<ai[ma]; i++) {
2060       d_nnz[aj[i]] ++;
2061       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2062     }
2063     /* compute local off-diagonal contributions */
2064     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2065     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2066     /* map those to global */
2067     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2068     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2069     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2070     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2071     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2072     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2073 
2074     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2075     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2076     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2077     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2078     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2079     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2080   } else {
2081     B = *matout;
2082     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2083     for (i=0; i<ai[ma]; i++){
2084       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2085     }
2086   }
2087 
2088   /* copy over the A part */
2089   array = Aloc->a;
2090   row = A->rmap->rstart;
2091   for (i=0; i<ma; i++) {
2092     ncol = ai[i+1]-ai[i];
2093     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2094     row++; array += ncol; aj += ncol;
2095   }
2096   aj = Aloc->j;
2097   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2098 
2099   /* copy over the B part */
2100   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2101   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2102   array = Bloc->a;
2103   row = A->rmap->rstart;
2104   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2105   cols_tmp = cols;
2106   for (i=0; i<mb; i++) {
2107     ncol = bi[i+1]-bi[i];
2108     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2109     row++; array += ncol; cols_tmp += ncol;
2110   }
2111   ierr = PetscFree(cols);CHKERRQ(ierr);
2112 
2113   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2114   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2115   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2116     *matout = B;
2117   } else {
2118     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2119   }
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2125 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2126 {
2127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2128   Mat            a = aij->A,b = aij->B;
2129   PetscErrorCode ierr;
2130   PetscInt       s1,s2,s3;
2131 
2132   PetscFunctionBegin;
2133   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2134   if (rr) {
2135     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2136     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2137     /* Overlap communication with computation. */
2138     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2139   }
2140   if (ll) {
2141     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2142     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2143     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2144   }
2145   /* scale  the diagonal block */
2146   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2147 
2148   if (rr) {
2149     /* Do a scatter end and then right scale the off-diagonal block */
2150     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2151     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2152   }
2153 
2154   PetscFunctionReturn(0);
2155 }
2156 
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2159 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2160 {
2161   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2162   PetscErrorCode ierr;
2163 
2164   PetscFunctionBegin;
2165   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatEqual_MPIAIJ"
2171 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2172 {
2173   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2174   Mat            a,b,c,d;
2175   PetscBool      flg;
2176   PetscErrorCode ierr;
2177 
2178   PetscFunctionBegin;
2179   a = matA->A; b = matA->B;
2180   c = matB->A; d = matB->B;
2181 
2182   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2183   if (flg) {
2184     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2185   }
2186   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatCopy_MPIAIJ"
2192 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2193 {
2194   PetscErrorCode ierr;
2195   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2196   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2197 
2198   PetscFunctionBegin;
2199   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2200   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2201     /* because of the column compression in the off-processor part of the matrix a->B,
2202        the number of columns in a->B and b->B may be different, hence we cannot call
2203        the MatCopy() directly on the two parts. If need be, we can provide a more
2204        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2205        then copying the submatrices */
2206     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2207   } else {
2208     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2209     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2210   }
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 #undef __FUNCT__
2215 #define __FUNCT__ "MatSetUp_MPIAIJ"
2216 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2217 {
2218   PetscErrorCode ierr;
2219 
2220   PetscFunctionBegin;
2221   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 #undef __FUNCT__
2226 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2227 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2228 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2229 {
2230   PetscInt          i,m=Y->rmap->N;
2231   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2232   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2233   const PetscInt    *xi = x->i,*yi = y->i;
2234 
2235   PetscFunctionBegin;
2236   /* Set the number of nonzeros in the new matrix */
2237   for (i=0; i<m; i++) {
2238     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2239     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2240     nnz[i] = 0;
2241     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2242       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2243       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2244       nnz[i]++;
2245     }
2246     for (; k<nzy; k++) nnz[i]++;
2247   }
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 #undef __FUNCT__
2252 #define __FUNCT__ "MatAXPY_MPIAIJ"
2253 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2254 {
2255   PetscErrorCode ierr;
2256   PetscInt       i;
2257   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2258   PetscBLASInt   bnz,one=1;
2259   Mat_SeqAIJ     *x,*y;
2260 
2261   PetscFunctionBegin;
2262   if (str == SAME_NONZERO_PATTERN) {
2263     PetscScalar alpha = a;
2264     x = (Mat_SeqAIJ *)xx->A->data;
2265     y = (Mat_SeqAIJ *)yy->A->data;
2266     bnz = PetscBLASIntCast(x->nz);
2267     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2268     x = (Mat_SeqAIJ *)xx->B->data;
2269     y = (Mat_SeqAIJ *)yy->B->data;
2270     bnz = PetscBLASIntCast(x->nz);
2271     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2272   } else if (str == SUBSET_NONZERO_PATTERN) {
2273     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2274 
2275     x = (Mat_SeqAIJ *)xx->B->data;
2276     y = (Mat_SeqAIJ *)yy->B->data;
2277     if (y->xtoy && y->XtoY != xx->B) {
2278       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2279       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2280     }
2281     if (!y->xtoy) { /* get xtoy */
2282       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2283       y->XtoY = xx->B;
2284       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2285     }
2286     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2287   } else {
2288     Mat B;
2289     PetscInt *nnz_d,*nnz_o;
2290     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2291     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2292     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2293     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2294     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2295     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2296     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2297     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2298     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2299     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2300     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2301     ierr = MatHeaderReplace(Y,B);
2302     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2303     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2304   }
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2309 
2310 #undef __FUNCT__
2311 #define __FUNCT__ "MatConjugate_MPIAIJ"
2312 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2313 {
2314 #if defined(PETSC_USE_COMPLEX)
2315   PetscErrorCode ierr;
2316   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2317 
2318   PetscFunctionBegin;
2319   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2320   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2321 #else
2322   PetscFunctionBegin;
2323 #endif
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 #undef __FUNCT__
2328 #define __FUNCT__ "MatRealPart_MPIAIJ"
2329 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2330 {
2331   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2332   PetscErrorCode ierr;
2333 
2334   PetscFunctionBegin;
2335   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2336   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2342 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2343 {
2344   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2345   PetscErrorCode ierr;
2346 
2347   PetscFunctionBegin;
2348   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2349   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2350   PetscFunctionReturn(0);
2351 }
2352 
2353 #ifdef PETSC_HAVE_PBGL
2354 
2355 #include <boost/parallel/mpi/bsp_process_group.hpp>
2356 #include <boost/graph/distributed/ilu_default_graph.hpp>
2357 #include <boost/graph/distributed/ilu_0_block.hpp>
2358 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2359 #include <boost/graph/distributed/petsc/interface.hpp>
2360 #include <boost/multi_array.hpp>
2361 #include <boost/parallel/distributed_property_map->hpp>
2362 
2363 #undef __FUNCT__
2364 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2365 /*
2366   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2367 */
2368 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2369 {
2370   namespace petsc = boost::distributed::petsc;
2371 
2372   namespace graph_dist = boost::graph::distributed;
2373   using boost::graph::distributed::ilu_default::process_group_type;
2374   using boost::graph::ilu_permuted;
2375 
2376   PetscBool       row_identity, col_identity;
2377   PetscContainer  c;
2378   PetscInt        m, n, M, N;
2379   PetscErrorCode  ierr;
2380 
2381   PetscFunctionBegin;
2382   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2383   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2384   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2385   if (!row_identity || !col_identity) {
2386     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2387   }
2388 
2389   process_group_type pg;
2390   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2391   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2392   lgraph_type&   level_graph = *lgraph_p;
2393   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2394 
2395   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2396   ilu_permuted(level_graph);
2397 
2398   /* put together the new matrix */
2399   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2400   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2401   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2402   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2403   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2404   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2405   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2406   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2407 
2408   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2409   ierr = PetscContainerSetPointer(c, lgraph_p);
2410   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2411   ierr = PetscContainerDestroy(&c);
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 #undef __FUNCT__
2416 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2417 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2418 {
2419   PetscFunctionBegin;
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 #undef __FUNCT__
2424 #define __FUNCT__ "MatSolve_MPIAIJ"
2425 /*
2426   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2427 */
2428 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2429 {
2430   namespace graph_dist = boost::graph::distributed;
2431 
2432   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2433   lgraph_type*   lgraph_p;
2434   PetscContainer c;
2435   PetscErrorCode ierr;
2436 
2437   PetscFunctionBegin;
2438   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2439   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2440   ierr = VecCopy(b, x);CHKERRQ(ierr);
2441 
2442   PetscScalar* array_x;
2443   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2444   PetscInt sx;
2445   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2446 
2447   PetscScalar* array_b;
2448   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2449   PetscInt sb;
2450   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2451 
2452   lgraph_type&   level_graph = *lgraph_p;
2453   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2454 
2455   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2456   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2457                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2458 
2459   typedef boost::iterator_property_map<array_ref_type::iterator,
2460                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2461   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2462                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2463 
2464   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2465 
2466   PetscFunctionReturn(0);
2467 }
2468 #endif
2469 
2470 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2471   PetscInt       nzlocal,nsends,nrecvs;
2472   PetscMPIInt    *send_rank,*recv_rank;
2473   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2474   PetscScalar    *sbuf_a,**rbuf_a;
2475   PetscErrorCode (*Destroy)(Mat);
2476 } Mat_Redundant;
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2480 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2481 {
2482   PetscErrorCode       ierr;
2483   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2484   PetscInt             i;
2485 
2486   PetscFunctionBegin;
2487   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2488   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2489   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2490   for (i=0; i<redund->nrecvs; i++){
2491     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2492     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2493   }
2494   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2495   ierr = PetscFree(redund);CHKERRQ(ierr);
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 #undef __FUNCT__
2500 #define __FUNCT__ "MatDestroy_MatRedundant"
2501 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2502 {
2503   PetscErrorCode  ierr;
2504   PetscContainer  container;
2505   Mat_Redundant   *redund=PETSC_NULL;
2506 
2507   PetscFunctionBegin;
2508   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2509   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2510   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2511   A->ops->destroy = redund->Destroy;
2512   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2513   if (A->ops->destroy) {
2514     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2515   }
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2521 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2522 {
2523   PetscMPIInt    rank,size;
2524   MPI_Comm       comm=((PetscObject)mat)->comm;
2525   PetscErrorCode ierr;
2526   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2527   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2528   PetscInt       *rowrange=mat->rmap->range;
2529   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2530   Mat            A=aij->A,B=aij->B,C=*matredundant;
2531   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2532   PetscScalar    *sbuf_a;
2533   PetscInt       nzlocal=a->nz+b->nz;
2534   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2535   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2536   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2537   MatScalar      *aworkA,*aworkB;
2538   PetscScalar    *vals;
2539   PetscMPIInt    tag1,tag2,tag3,imdex;
2540   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2541                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2542   MPI_Status     recv_status,*send_status;
2543   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2544   PetscInt       **rbuf_j=PETSC_NULL;
2545   PetscScalar    **rbuf_a=PETSC_NULL;
2546   Mat_Redundant  *redund=PETSC_NULL;
2547   PetscContainer container;
2548 
2549   PetscFunctionBegin;
2550   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2551   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2552 
2553   if (reuse == MAT_REUSE_MATRIX) {
2554     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2555     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2556     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2557     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2558     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2559     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2560     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2561     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2562 
2563     nsends    = redund->nsends;
2564     nrecvs    = redund->nrecvs;
2565     send_rank = redund->send_rank;
2566     recv_rank = redund->recv_rank;
2567     sbuf_nz   = redund->sbuf_nz;
2568     rbuf_nz   = redund->rbuf_nz;
2569     sbuf_j    = redund->sbuf_j;
2570     sbuf_a    = redund->sbuf_a;
2571     rbuf_j    = redund->rbuf_j;
2572     rbuf_a    = redund->rbuf_a;
2573   }
2574 
2575   if (reuse == MAT_INITIAL_MATRIX){
2576     PetscMPIInt  subrank,subsize;
2577     PetscInt     nleftover,np_subcomm;
2578     /* get the destination processors' id send_rank, nsends and nrecvs */
2579     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2580     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2581     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2582     np_subcomm = size/nsubcomm;
2583     nleftover  = size - nsubcomm*np_subcomm;
2584     nsends = 0; nrecvs = 0;
2585     for (i=0; i<size; i++){ /* i=rank*/
2586       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2587         send_rank[nsends] = i; nsends++;
2588         recv_rank[nrecvs++] = i;
2589       }
2590     }
2591     if (rank >= size - nleftover){/* this proc is a leftover processor */
2592       i = size-nleftover-1;
2593       j = 0;
2594       while (j < nsubcomm - nleftover){
2595         send_rank[nsends++] = i;
2596         i--; j++;
2597       }
2598     }
2599 
2600     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2601       for (i=0; i<nleftover; i++){
2602         recv_rank[nrecvs++] = size-nleftover+i;
2603       }
2604     }
2605 
2606     /* allocate sbuf_j, sbuf_a */
2607     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2608     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2609     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2610   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2611 
2612   /* copy mat's local entries into the buffers */
2613   if (reuse == MAT_INITIAL_MATRIX){
2614     rownz_max = 0;
2615     rptr = sbuf_j;
2616     cols = sbuf_j + rend-rstart + 1;
2617     vals = sbuf_a;
2618     rptr[0] = 0;
2619     for (i=0; i<rend-rstart; i++){
2620       row = i + rstart;
2621       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2622       ncols  = nzA + nzB;
2623       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2624       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2625       /* load the column indices for this row into cols */
2626       lwrite = 0;
2627       for (l=0; l<nzB; l++) {
2628         if ((ctmp = bmap[cworkB[l]]) < cstart){
2629           vals[lwrite]   = aworkB[l];
2630           cols[lwrite++] = ctmp;
2631         }
2632       }
2633       for (l=0; l<nzA; l++){
2634         vals[lwrite]   = aworkA[l];
2635         cols[lwrite++] = cstart + cworkA[l];
2636       }
2637       for (l=0; l<nzB; l++) {
2638         if ((ctmp = bmap[cworkB[l]]) >= cend){
2639           vals[lwrite]   = aworkB[l];
2640           cols[lwrite++] = ctmp;
2641         }
2642       }
2643       vals += ncols;
2644       cols += ncols;
2645       rptr[i+1] = rptr[i] + ncols;
2646       if (rownz_max < ncols) rownz_max = ncols;
2647     }
2648     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2649   } else { /* only copy matrix values into sbuf_a */
2650     rptr = sbuf_j;
2651     vals = sbuf_a;
2652     rptr[0] = 0;
2653     for (i=0; i<rend-rstart; i++){
2654       row = i + rstart;
2655       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2656       ncols  = nzA + nzB;
2657       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2658       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2659       lwrite = 0;
2660       for (l=0; l<nzB; l++) {
2661         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2662       }
2663       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2664       for (l=0; l<nzB; l++) {
2665         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2666       }
2667       vals += ncols;
2668       rptr[i+1] = rptr[i] + ncols;
2669     }
2670   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2671 
2672   /* send nzlocal to others, and recv other's nzlocal */
2673   /*--------------------------------------------------*/
2674   if (reuse == MAT_INITIAL_MATRIX){
2675     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2676     s_waits2 = s_waits3 + nsends;
2677     s_waits1 = s_waits2 + nsends;
2678     r_waits1 = s_waits1 + nsends;
2679     r_waits2 = r_waits1 + nrecvs;
2680     r_waits3 = r_waits2 + nrecvs;
2681   } else {
2682     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2683     r_waits3 = s_waits3 + nsends;
2684   }
2685 
2686   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2687   if (reuse == MAT_INITIAL_MATRIX){
2688     /* get new tags to keep the communication clean */
2689     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2690     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2691     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2692 
2693     /* post receives of other's nzlocal */
2694     for (i=0; i<nrecvs; i++){
2695       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2696     }
2697     /* send nzlocal to others */
2698     for (i=0; i<nsends; i++){
2699       sbuf_nz[i] = nzlocal;
2700       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2701     }
2702     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2703     count = nrecvs;
2704     while (count) {
2705       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2706       recv_rank[imdex] = recv_status.MPI_SOURCE;
2707       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2708       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2709 
2710       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2711       rbuf_nz[imdex] += i + 2;
2712       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2713       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2714       count--;
2715     }
2716     /* wait on sends of nzlocal */
2717     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2718     /* send mat->i,j to others, and recv from other's */
2719     /*------------------------------------------------*/
2720     for (i=0; i<nsends; i++){
2721       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2722       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2723     }
2724     /* wait on receives of mat->i,j */
2725     /*------------------------------*/
2726     count = nrecvs;
2727     while (count) {
2728       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2729       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2730       count--;
2731     }
2732     /* wait on sends of mat->i,j */
2733     /*---------------------------*/
2734     if (nsends) {
2735       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2736     }
2737   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2738 
2739   /* post receives, send and receive mat->a */
2740   /*----------------------------------------*/
2741   for (imdex=0; imdex<nrecvs; imdex++) {
2742     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2743   }
2744   for (i=0; i<nsends; i++){
2745     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2746   }
2747   count = nrecvs;
2748   while (count) {
2749     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2750     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2751     count--;
2752   }
2753   if (nsends) {
2754     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2755   }
2756 
2757   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2758 
2759   /* create redundant matrix */
2760   /*-------------------------*/
2761   if (reuse == MAT_INITIAL_MATRIX){
2762     /* compute rownz_max for preallocation */
2763     for (imdex=0; imdex<nrecvs; imdex++){
2764       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2765       rptr = rbuf_j[imdex];
2766       for (i=0; i<j; i++){
2767         ncols = rptr[i+1] - rptr[i];
2768         if (rownz_max < ncols) rownz_max = ncols;
2769       }
2770     }
2771 
2772     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2773     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2774     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2775     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2776     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2777     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2778   } else {
2779     C = *matredundant;
2780   }
2781 
2782   /* insert local matrix entries */
2783   rptr = sbuf_j;
2784   cols = sbuf_j + rend-rstart + 1;
2785   vals = sbuf_a;
2786   for (i=0; i<rend-rstart; i++){
2787     row   = i + rstart;
2788     ncols = rptr[i+1] - rptr[i];
2789     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2790     vals += ncols;
2791     cols += ncols;
2792   }
2793   /* insert received matrix entries */
2794   for (imdex=0; imdex<nrecvs; imdex++){
2795     rstart = rowrange[recv_rank[imdex]];
2796     rend   = rowrange[recv_rank[imdex]+1];
2797     rptr = rbuf_j[imdex];
2798     cols = rbuf_j[imdex] + rend-rstart + 1;
2799     vals = rbuf_a[imdex];
2800     for (i=0; i<rend-rstart; i++){
2801       row   = i + rstart;
2802       ncols = rptr[i+1] - rptr[i];
2803       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2804       vals += ncols;
2805       cols += ncols;
2806     }
2807   }
2808   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2809   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2810   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2811   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2812   if (reuse == MAT_INITIAL_MATRIX) {
2813     PetscContainer container;
2814     *matredundant = C;
2815     /* create a supporting struct and attach it to C for reuse */
2816     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2817     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2818     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2819     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2820     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2821     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2822 
2823     redund->nzlocal = nzlocal;
2824     redund->nsends  = nsends;
2825     redund->nrecvs  = nrecvs;
2826     redund->send_rank = send_rank;
2827     redund->recv_rank = recv_rank;
2828     redund->sbuf_nz = sbuf_nz;
2829     redund->rbuf_nz = rbuf_nz;
2830     redund->sbuf_j  = sbuf_j;
2831     redund->sbuf_a  = sbuf_a;
2832     redund->rbuf_j  = rbuf_j;
2833     redund->rbuf_a  = rbuf_a;
2834 
2835     redund->Destroy = C->ops->destroy;
2836     C->ops->destroy = MatDestroy_MatRedundant;
2837   }
2838   PetscFunctionReturn(0);
2839 }
2840 
2841 #undef __FUNCT__
2842 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2843 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2844 {
2845   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2846   PetscErrorCode ierr;
2847   PetscInt       i,*idxb = 0;
2848   PetscScalar    *va,*vb;
2849   Vec            vtmp;
2850 
2851   PetscFunctionBegin;
2852   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2853   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2854   if (idx) {
2855     for (i=0; i<A->rmap->n; i++) {
2856       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2857     }
2858   }
2859 
2860   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2861   if (idx) {
2862     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2863   }
2864   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2865   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2866 
2867   for (i=0; i<A->rmap->n; i++){
2868     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2869       va[i] = vb[i];
2870       if (idx) idx[i] = a->garray[idxb[i]];
2871     }
2872   }
2873 
2874   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2875   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2876   ierr = PetscFree(idxb);CHKERRQ(ierr);
2877   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2878   PetscFunctionReturn(0);
2879 }
2880 
2881 #undef __FUNCT__
2882 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2883 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2884 {
2885   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2886   PetscErrorCode ierr;
2887   PetscInt       i,*idxb = 0;
2888   PetscScalar    *va,*vb;
2889   Vec            vtmp;
2890 
2891   PetscFunctionBegin;
2892   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2893   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2894   if (idx) {
2895     for (i=0; i<A->cmap->n; i++) {
2896       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2897     }
2898   }
2899 
2900   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2901   if (idx) {
2902     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2903   }
2904   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2905   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2906 
2907   for (i=0; i<A->rmap->n; i++){
2908     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2909       va[i] = vb[i];
2910       if (idx) idx[i] = a->garray[idxb[i]];
2911     }
2912   }
2913 
2914   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2915   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2916   ierr = PetscFree(idxb);CHKERRQ(ierr);
2917   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2918   PetscFunctionReturn(0);
2919 }
2920 
2921 #undef __FUNCT__
2922 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2923 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2924 {
2925   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2926   PetscInt       n      = A->rmap->n;
2927   PetscInt       cstart = A->cmap->rstart;
2928   PetscInt      *cmap   = mat->garray;
2929   PetscInt      *diagIdx, *offdiagIdx;
2930   Vec            diagV, offdiagV;
2931   PetscScalar   *a, *diagA, *offdiagA;
2932   PetscInt       r;
2933   PetscErrorCode ierr;
2934 
2935   PetscFunctionBegin;
2936   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2937   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2938   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2939   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2940   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2941   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2942   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2943   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2944   for (r = 0; r < n; ++r) {
2945     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2946       a[r]   = diagA[r];
2947       idx[r] = cstart + diagIdx[r];
2948     } else {
2949       a[r]   = offdiagA[r];
2950       idx[r] = cmap[offdiagIdx[r]];
2951     }
2952   }
2953   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2954   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2955   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2956   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2957   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2958   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 #undef __FUNCT__
2963 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2964 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2965 {
2966   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2967   PetscInt       n      = A->rmap->n;
2968   PetscInt       cstart = A->cmap->rstart;
2969   PetscInt      *cmap   = mat->garray;
2970   PetscInt      *diagIdx, *offdiagIdx;
2971   Vec            diagV, offdiagV;
2972   PetscScalar   *a, *diagA, *offdiagA;
2973   PetscInt       r;
2974   PetscErrorCode ierr;
2975 
2976   PetscFunctionBegin;
2977   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2978   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2979   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2980   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2981   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2982   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2983   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2984   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2985   for (r = 0; r < n; ++r) {
2986     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2987       a[r]   = diagA[r];
2988       idx[r] = cstart + diagIdx[r];
2989     } else {
2990       a[r]   = offdiagA[r];
2991       idx[r] = cmap[offdiagIdx[r]];
2992     }
2993   }
2994   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2995   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2996   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2997   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2998   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2999   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3000   PetscFunctionReturn(0);
3001 }
3002 
3003 #undef __FUNCT__
3004 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3005 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3006 {
3007   PetscErrorCode ierr;
3008   Mat            *dummy;
3009 
3010   PetscFunctionBegin;
3011   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3012   *newmat = *dummy;
3013   ierr = PetscFree(dummy);CHKERRQ(ierr);
3014   PetscFunctionReturn(0);
3015 }
3016 
3017 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3018 
3019 #undef __FUNCT__
3020 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3021 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3022 {
3023   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3024   PetscErrorCode ierr;
3025 
3026   PetscFunctionBegin;
3027   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3028   PetscFunctionReturn(0);
3029 }
3030 
3031 #undef __FUNCT__
3032 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3033 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3034 {
3035   PetscErrorCode ierr;
3036   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3037 
3038   PetscFunctionBegin;
3039   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3040   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3041   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3042   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3043   PetscFunctionReturn(0);
3044 }
3045 
3046 /* -------------------------------------------------------------------*/
3047 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3048        MatGetRow_MPIAIJ,
3049        MatRestoreRow_MPIAIJ,
3050        MatMult_MPIAIJ,
3051 /* 4*/ MatMultAdd_MPIAIJ,
3052        MatMultTranspose_MPIAIJ,
3053        MatMultTransposeAdd_MPIAIJ,
3054 #ifdef PETSC_HAVE_PBGL
3055        MatSolve_MPIAIJ,
3056 #else
3057        0,
3058 #endif
3059        0,
3060        0,
3061 /*10*/ 0,
3062        0,
3063        0,
3064        MatSOR_MPIAIJ,
3065        MatTranspose_MPIAIJ,
3066 /*15*/ MatGetInfo_MPIAIJ,
3067        MatEqual_MPIAIJ,
3068        MatGetDiagonal_MPIAIJ,
3069        MatDiagonalScale_MPIAIJ,
3070        MatNorm_MPIAIJ,
3071 /*20*/ MatAssemblyBegin_MPIAIJ,
3072        MatAssemblyEnd_MPIAIJ,
3073        MatSetOption_MPIAIJ,
3074        MatZeroEntries_MPIAIJ,
3075 /*24*/ MatZeroRows_MPIAIJ,
3076        0,
3077 #ifdef PETSC_HAVE_PBGL
3078        0,
3079 #else
3080        0,
3081 #endif
3082        0,
3083        0,
3084 /*29*/ MatSetUp_MPIAIJ,
3085 #ifdef PETSC_HAVE_PBGL
3086        0,
3087 #else
3088        0,
3089 #endif
3090        0,
3091        0,
3092        0,
3093 /*34*/ MatDuplicate_MPIAIJ,
3094        0,
3095        0,
3096        0,
3097        0,
3098 /*39*/ MatAXPY_MPIAIJ,
3099        MatGetSubMatrices_MPIAIJ,
3100        MatIncreaseOverlap_MPIAIJ,
3101        MatGetValues_MPIAIJ,
3102        MatCopy_MPIAIJ,
3103 /*44*/ MatGetRowMax_MPIAIJ,
3104        MatScale_MPIAIJ,
3105        0,
3106        0,
3107        MatZeroRowsColumns_MPIAIJ,
3108 /*49*/ MatSetRandom_MPIAIJ,
3109        0,
3110        0,
3111        0,
3112        0,
3113 /*54*/ MatFDColoringCreate_MPIAIJ,
3114        0,
3115        MatSetUnfactored_MPIAIJ,
3116        0, /* MatPermute_MPIAIJ, impl currently broken */
3117        0,
3118 /*59*/ MatGetSubMatrix_MPIAIJ,
3119        MatDestroy_MPIAIJ,
3120        MatView_MPIAIJ,
3121        0,
3122        0,
3123 /*64*/ 0,
3124        0,
3125        0,
3126        0,
3127        0,
3128 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3129        MatGetRowMinAbs_MPIAIJ,
3130        0,
3131        MatSetColoring_MPIAIJ,
3132 #if defined(PETSC_HAVE_ADIC)
3133        MatSetValuesAdic_MPIAIJ,
3134 #else
3135        0,
3136 #endif
3137        MatSetValuesAdifor_MPIAIJ,
3138 /*75*/ MatFDColoringApply_AIJ,
3139        0,
3140        0,
3141        0,
3142        MatFindZeroDiagonals_MPIAIJ,
3143 /*80*/ 0,
3144        0,
3145        0,
3146 /*83*/ MatLoad_MPIAIJ,
3147        0,
3148        0,
3149        0,
3150        0,
3151        0,
3152 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3153        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3154        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3155        MatPtAP_Basic,
3156        MatPtAPSymbolic_MPIAIJ,
3157 /*94*/ MatPtAPNumeric_MPIAIJ,
3158        0,
3159        0,
3160        0,
3161        0,
3162 /*99*/ 0,
3163        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3164        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3165        MatConjugate_MPIAIJ,
3166        0,
3167 /*104*/MatSetValuesRow_MPIAIJ,
3168        MatRealPart_MPIAIJ,
3169        MatImaginaryPart_MPIAIJ,
3170        0,
3171        0,
3172 /*109*/0,
3173        MatGetRedundantMatrix_MPIAIJ,
3174        MatGetRowMin_MPIAIJ,
3175        0,
3176        0,
3177 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3178        0,
3179        0,
3180        0,
3181        0,
3182 /*119*/0,
3183        0,
3184        0,
3185        0,
3186        MatGetMultiProcBlock_MPIAIJ,
3187 /*124*/MatFindNonzeroRows_MPIAIJ,
3188        MatGetColumnNorms_MPIAIJ,
3189        MatInvertBlockDiagonal_MPIAIJ,
3190        0,
3191        MatGetSubMatricesParallel_MPIAIJ,
3192 /*129*/0,
3193        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3194        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3195        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3196        0,
3197 /*134*/0,
3198        0,
3199        0,
3200        0,
3201        0
3202 };
3203 
3204 /* ----------------------------------------------------------------------------------------*/
3205 
3206 EXTERN_C_BEGIN
3207 #undef __FUNCT__
3208 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3209 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3210 {
3211   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3212   PetscErrorCode ierr;
3213 
3214   PetscFunctionBegin;
3215   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3216   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3217   PetscFunctionReturn(0);
3218 }
3219 EXTERN_C_END
3220 
3221 EXTERN_C_BEGIN
3222 #undef __FUNCT__
3223 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3224 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3225 {
3226   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3227   PetscErrorCode ierr;
3228 
3229   PetscFunctionBegin;
3230   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3231   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3232   PetscFunctionReturn(0);
3233 }
3234 EXTERN_C_END
3235 
3236 EXTERN_C_BEGIN
3237 #undef __FUNCT__
3238 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3239 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3240 {
3241   Mat_MPIAIJ     *b;
3242   PetscErrorCode ierr;
3243   PetscInt       i;
3244   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3245 
3246   PetscFunctionBegin;
3247   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3248   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3249   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3250   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3251   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3252   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3253 
3254   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3255   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3256   if (d_nnz) {
3257     for (i=0; i<B->rmap->n; i++) {
3258       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3259     }
3260   }
3261   if (o_nnz) {
3262     for (i=0; i<B->rmap->n; i++) {
3263       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3264     }
3265   }
3266   b = (Mat_MPIAIJ*)B->data;
3267 
3268   if (!B->preallocated) {
3269     /* Explicitly create 2 MATSEQAIJ matrices. */
3270     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3271     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3272     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3273     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3274     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3275     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3276     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3277     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3278     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3279     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3280   }
3281 
3282   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3283   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3284   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3285   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3286   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3287   B->preallocated = PETSC_TRUE;
3288   PetscFunctionReturn(0);
3289 }
3290 EXTERN_C_END
3291 
3292 #undef __FUNCT__
3293 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3294 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3295 {
3296   Mat            mat;
3297   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3298   PetscErrorCode ierr;
3299 
3300   PetscFunctionBegin;
3301   *newmat       = 0;
3302   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3303   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3304   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3305   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3306   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3307   a    = (Mat_MPIAIJ*)mat->data;
3308 
3309   mat->factortype    = matin->factortype;
3310   mat->rmap->bs      = matin->rmap->bs;
3311   mat->cmap->bs      = matin->cmap->bs;
3312   mat->assembled    = PETSC_TRUE;
3313   mat->insertmode   = NOT_SET_VALUES;
3314   mat->preallocated = PETSC_TRUE;
3315 
3316   a->size           = oldmat->size;
3317   a->rank           = oldmat->rank;
3318   a->donotstash     = oldmat->donotstash;
3319   a->roworiented    = oldmat->roworiented;
3320   a->rowindices     = 0;
3321   a->rowvalues      = 0;
3322   a->getrowactive   = PETSC_FALSE;
3323 
3324   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3325   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3326 
3327   if (oldmat->colmap) {
3328 #if defined (PETSC_USE_CTABLE)
3329     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3330 #else
3331     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3332     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3333     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3334 #endif
3335   } else a->colmap = 0;
3336   if (oldmat->garray) {
3337     PetscInt len;
3338     len  = oldmat->B->cmap->n;
3339     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3340     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3341     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3342   } else a->garray = 0;
3343 
3344   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3345   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3346   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3347   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3348   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3349   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3350   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3351   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3352   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3353   *newmat = mat;
3354   PetscFunctionReturn(0);
3355 }
3356 
3357 
3358 
3359 #undef __FUNCT__
3360 #define __FUNCT__ "MatLoad_MPIAIJ"
3361 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3362 {
3363   PetscScalar    *vals,*svals;
3364   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3365   PetscErrorCode ierr;
3366   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3367   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3368   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3369   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3370   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3371   int            fd;
3372   PetscInt       bs = 1;
3373 
3374   PetscFunctionBegin;
3375   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3376   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3377   if (!rank) {
3378     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3379     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3380     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3381   }
3382 
3383   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3384   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3385   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3386 
3387   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3388 
3389   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3390   M = header[1]; N = header[2];
3391   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3392   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3393   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3394 
3395   /* If global sizes are set, check if they are consistent with that given in the file */
3396   if (sizesset) {
3397     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3398   }
3399   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3400   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3401 
3402   /* determine ownership of all (block) rows */
3403   if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3404   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3405   else m = newMat->rmap->n; /* Set by user */
3406 
3407   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3408   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3409 
3410   /* First process needs enough room for process with most rows */
3411   if (!rank) {
3412     mmax = rowners[1];
3413     for (i=2; i<=size; i++) {
3414       mmax = PetscMax(mmax, rowners[i]);
3415     }
3416   } else mmax = m;
3417 
3418   rowners[0] = 0;
3419   for (i=2; i<=size; i++) {
3420     rowners[i] += rowners[i-1];
3421   }
3422   rstart = rowners[rank];
3423   rend   = rowners[rank+1];
3424 
3425   /* distribute row lengths to all processors */
3426   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3427   if (!rank) {
3428     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3429     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3430     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3431     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3432     for (j=0; j<m; j++) {
3433       procsnz[0] += ourlens[j];
3434     }
3435     for (i=1; i<size; i++) {
3436       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3437       /* calculate the number of nonzeros on each processor */
3438       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3439         procsnz[i] += rowlengths[j];
3440       }
3441       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3442     }
3443     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3444   } else {
3445     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3446   }
3447 
3448   if (!rank) {
3449     /* determine max buffer needed and allocate it */
3450     maxnz = 0;
3451     for (i=0; i<size; i++) {
3452       maxnz = PetscMax(maxnz,procsnz[i]);
3453     }
3454     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3455 
3456     /* read in my part of the matrix column indices  */
3457     nz   = procsnz[0];
3458     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3459     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3460 
3461     /* read in every one elses and ship off */
3462     for (i=1; i<size; i++) {
3463       nz     = procsnz[i];
3464       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3465       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3466     }
3467     ierr = PetscFree(cols);CHKERRQ(ierr);
3468   } else {
3469     /* determine buffer space needed for message */
3470     nz = 0;
3471     for (i=0; i<m; i++) {
3472       nz += ourlens[i];
3473     }
3474     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3475 
3476     /* receive message of column indices*/
3477     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3478   }
3479 
3480   /* determine column ownership if matrix is not square */
3481   if (N != M) {
3482     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3483     else n = newMat->cmap->n;
3484     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3485     cstart = cend - n;
3486   } else {
3487     cstart = rstart;
3488     cend   = rend;
3489     n      = cend - cstart;
3490   }
3491 
3492   /* loop over local rows, determining number of off diagonal entries */
3493   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3494   jj = 0;
3495   for (i=0; i<m; i++) {
3496     for (j=0; j<ourlens[i]; j++) {
3497       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3498       jj++;
3499     }
3500   }
3501 
3502   for (i=0; i<m; i++) {
3503     ourlens[i] -= offlens[i];
3504   }
3505   if (!sizesset) {
3506     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3507   }
3508 
3509   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3510 
3511   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3512 
3513   for (i=0; i<m; i++) {
3514     ourlens[i] += offlens[i];
3515   }
3516 
3517   if (!rank) {
3518     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3519 
3520     /* read in my part of the matrix numerical values  */
3521     nz   = procsnz[0];
3522     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3523 
3524     /* insert into matrix */
3525     jj      = rstart;
3526     smycols = mycols;
3527     svals   = vals;
3528     for (i=0; i<m; i++) {
3529       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3530       smycols += ourlens[i];
3531       svals   += ourlens[i];
3532       jj++;
3533     }
3534 
3535     /* read in other processors and ship out */
3536     for (i=1; i<size; i++) {
3537       nz     = procsnz[i];
3538       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3539       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3540     }
3541     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3542   } else {
3543     /* receive numeric values */
3544     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3545 
3546     /* receive message of values*/
3547     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3548 
3549     /* insert into matrix */
3550     jj      = rstart;
3551     smycols = mycols;
3552     svals   = vals;
3553     for (i=0; i<m; i++) {
3554       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3555       smycols += ourlens[i];
3556       svals   += ourlens[i];
3557       jj++;
3558     }
3559   }
3560   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3561   ierr = PetscFree(vals);CHKERRQ(ierr);
3562   ierr = PetscFree(mycols);CHKERRQ(ierr);
3563   ierr = PetscFree(rowners);CHKERRQ(ierr);
3564   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3565   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3566 
3567   PetscFunctionReturn(0);
3568 }
3569 
3570 #undef __FUNCT__
3571 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3572 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3573 {
3574   PetscErrorCode ierr;
3575   IS             iscol_local;
3576   PetscInt       csize;
3577 
3578   PetscFunctionBegin;
3579   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3580   if (call == MAT_REUSE_MATRIX) {
3581     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3582     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3583   } else {
3584     PetscInt cbs;
3585     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3586     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3587     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3588   }
3589   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3590   if (call == MAT_INITIAL_MATRIX) {
3591     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3592     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3593   }
3594   PetscFunctionReturn(0);
3595 }
3596 
3597 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3598 #undef __FUNCT__
3599 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3600 /*
3601     Not great since it makes two copies of the submatrix, first an SeqAIJ
3602   in local and then by concatenating the local matrices the end result.
3603   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3604 
3605   Note: This requires a sequential iscol with all indices.
3606 */
3607 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3608 {
3609   PetscErrorCode ierr;
3610   PetscMPIInt    rank,size;
3611   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3612   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3613   PetscBool      allcolumns, colflag;
3614   Mat            M,Mreuse;
3615   MatScalar      *vwork,*aa;
3616   MPI_Comm       comm = ((PetscObject)mat)->comm;
3617   Mat_SeqAIJ     *aij;
3618 
3619 
3620   PetscFunctionBegin;
3621   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3622   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3623 
3624   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3625   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3626   if (colflag && ncol == mat->cmap->N){
3627     allcolumns = PETSC_TRUE;
3628   } else {
3629     allcolumns = PETSC_FALSE;
3630   }
3631   if (call ==  MAT_REUSE_MATRIX) {
3632     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3633     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3634     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3635   } else {
3636     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3637   }
3638 
3639   /*
3640       m - number of local rows
3641       n - number of columns (same on all processors)
3642       rstart - first row in new global matrix generated
3643   */
3644   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3645   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3646   if (call == MAT_INITIAL_MATRIX) {
3647     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3648     ii  = aij->i;
3649     jj  = aij->j;
3650 
3651     /*
3652         Determine the number of non-zeros in the diagonal and off-diagonal
3653         portions of the matrix in order to do correct preallocation
3654     */
3655 
3656     /* first get start and end of "diagonal" columns */
3657     if (csize == PETSC_DECIDE) {
3658       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3659       if (mglobal == n) { /* square matrix */
3660 	nlocal = m;
3661       } else {
3662         nlocal = n/size + ((n % size) > rank);
3663       }
3664     } else {
3665       nlocal = csize;
3666     }
3667     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3668     rstart = rend - nlocal;
3669     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3670 
3671     /* next, compute all the lengths */
3672     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3673     olens = dlens + m;
3674     for (i=0; i<m; i++) {
3675       jend = ii[i+1] - ii[i];
3676       olen = 0;
3677       dlen = 0;
3678       for (j=0; j<jend; j++) {
3679         if (*jj < rstart || *jj >= rend) olen++;
3680         else dlen++;
3681         jj++;
3682       }
3683       olens[i] = olen;
3684       dlens[i] = dlen;
3685     }
3686     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3687     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3688     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3689     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3690     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3691     ierr = PetscFree(dlens);CHKERRQ(ierr);
3692   } else {
3693     PetscInt ml,nl;
3694 
3695     M = *newmat;
3696     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3697     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3698     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3699     /*
3700          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3701        rather than the slower MatSetValues().
3702     */
3703     M->was_assembled = PETSC_TRUE;
3704     M->assembled     = PETSC_FALSE;
3705   }
3706   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3707   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3708   ii  = aij->i;
3709   jj  = aij->j;
3710   aa  = aij->a;
3711   for (i=0; i<m; i++) {
3712     row   = rstart + i;
3713     nz    = ii[i+1] - ii[i];
3714     cwork = jj;     jj += nz;
3715     vwork = aa;     aa += nz;
3716     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3717   }
3718 
3719   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3720   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3721   *newmat = M;
3722 
3723   /* save submatrix used in processor for next request */
3724   if (call ==  MAT_INITIAL_MATRIX) {
3725     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3726     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3727   }
3728 
3729   PetscFunctionReturn(0);
3730 }
3731 
3732 EXTERN_C_BEGIN
3733 #undef __FUNCT__
3734 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3735 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3736 {
3737   PetscInt       m,cstart, cend,j,nnz,i,d;
3738   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3739   const PetscInt *JJ;
3740   PetscScalar    *values;
3741   PetscErrorCode ierr;
3742 
3743   PetscFunctionBegin;
3744   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3745 
3746   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3747   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3748   m      = B->rmap->n;
3749   cstart = B->cmap->rstart;
3750   cend   = B->cmap->rend;
3751   rstart = B->rmap->rstart;
3752 
3753   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3754 
3755 #if defined(PETSC_USE_DEBUGGING)
3756   for (i=0; i<m; i++) {
3757     nnz     = Ii[i+1]- Ii[i];
3758     JJ      = J + Ii[i];
3759     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3760     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3761     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3762   }
3763 #endif
3764 
3765   for (i=0; i<m; i++) {
3766     nnz     = Ii[i+1]- Ii[i];
3767     JJ      = J + Ii[i];
3768     nnz_max = PetscMax(nnz_max,nnz);
3769     d       = 0;
3770     for (j=0; j<nnz; j++) {
3771       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3772     }
3773     d_nnz[i] = d;
3774     o_nnz[i] = nnz - d;
3775   }
3776   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3777   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3778 
3779   if (v) values = (PetscScalar*)v;
3780   else {
3781     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3782     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3783   }
3784 
3785   for (i=0; i<m; i++) {
3786     ii   = i + rstart;
3787     nnz  = Ii[i+1]- Ii[i];
3788     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3789   }
3790   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3791   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3792 
3793   if (!v) {
3794     ierr = PetscFree(values);CHKERRQ(ierr);
3795   }
3796   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3797   PetscFunctionReturn(0);
3798 }
3799 EXTERN_C_END
3800 
3801 #undef __FUNCT__
3802 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3803 /*@
3804    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3805    (the default parallel PETSc format).
3806 
3807    Collective on MPI_Comm
3808 
3809    Input Parameters:
3810 +  B - the matrix
3811 .  i - the indices into j for the start of each local row (starts with zero)
3812 .  j - the column indices for each local row (starts with zero)
3813 -  v - optional values in the matrix
3814 
3815    Level: developer
3816 
3817    Notes:
3818        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3819      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3820      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3821 
3822        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3823 
3824        The format which is used for the sparse matrix input, is equivalent to a
3825     row-major ordering.. i.e for the following matrix, the input data expected is
3826     as shown:
3827 
3828         1 0 0
3829         2 0 3     P0
3830        -------
3831         4 5 6     P1
3832 
3833      Process0 [P0]: rows_owned=[0,1]
3834         i =  {0,1,3}  [size = nrow+1  = 2+1]
3835         j =  {0,0,2}  [size = nz = 6]
3836         v =  {1,2,3}  [size = nz = 6]
3837 
3838      Process1 [P1]: rows_owned=[2]
3839         i =  {0,3}    [size = nrow+1  = 1+1]
3840         j =  {0,1,2}  [size = nz = 6]
3841         v =  {4,5,6}  [size = nz = 6]
3842 
3843 .keywords: matrix, aij, compressed row, sparse, parallel
3844 
3845 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3846           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3847 @*/
3848 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3849 {
3850   PetscErrorCode ierr;
3851 
3852   PetscFunctionBegin;
3853   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3854   PetscFunctionReturn(0);
3855 }
3856 
3857 #undef __FUNCT__
3858 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3859 /*@C
3860    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3861    (the default parallel PETSc format).  For good matrix assembly performance
3862    the user should preallocate the matrix storage by setting the parameters
3863    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3864    performance can be increased by more than a factor of 50.
3865 
3866    Collective on MPI_Comm
3867 
3868    Input Parameters:
3869 +  A - the matrix
3870 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3871            (same value is used for all local rows)
3872 .  d_nnz - array containing the number of nonzeros in the various rows of the
3873            DIAGONAL portion of the local submatrix (possibly different for each row)
3874            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3875            The size of this array is equal to the number of local rows, i.e 'm'.
3876            For matrices that will be factored, you must leave room for (and set)
3877            the diagonal entry even if it is zero.
3878 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3879            submatrix (same value is used for all local rows).
3880 -  o_nnz - array containing the number of nonzeros in the various rows of the
3881            OFF-DIAGONAL portion of the local submatrix (possibly different for
3882            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3883            structure. The size of this array is equal to the number
3884            of local rows, i.e 'm'.
3885 
3886    If the *_nnz parameter is given then the *_nz parameter is ignored
3887 
3888    The AIJ format (also called the Yale sparse matrix format or
3889    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3890    storage.  The stored row and column indices begin with zero.
3891    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3892 
3893    The parallel matrix is partitioned such that the first m0 rows belong to
3894    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3895    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3896 
3897    The DIAGONAL portion of the local submatrix of a processor can be defined
3898    as the submatrix which is obtained by extraction the part corresponding to
3899    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3900    first row that belongs to the processor, r2 is the last row belonging to
3901    the this processor, and c1-c2 is range of indices of the local part of a
3902    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3903    common case of a square matrix, the row and column ranges are the same and
3904    the DIAGONAL part is also square. The remaining portion of the local
3905    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3906 
3907    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3908 
3909    You can call MatGetInfo() to get information on how effective the preallocation was;
3910    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3911    You can also run with the option -info and look for messages with the string
3912    malloc in them to see if additional memory allocation was needed.
3913 
3914    Example usage:
3915 
3916    Consider the following 8x8 matrix with 34 non-zero values, that is
3917    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3918    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3919    as follows:
3920 
3921 .vb
3922             1  2  0  |  0  3  0  |  0  4
3923     Proc0   0  5  6  |  7  0  0  |  8  0
3924             9  0 10  | 11  0  0  | 12  0
3925     -------------------------------------
3926            13  0 14  | 15 16 17  |  0  0
3927     Proc1   0 18  0  | 19 20 21  |  0  0
3928             0  0  0  | 22 23  0  | 24  0
3929     -------------------------------------
3930     Proc2  25 26 27  |  0  0 28  | 29  0
3931            30  0  0  | 31 32 33  |  0 34
3932 .ve
3933 
3934    This can be represented as a collection of submatrices as:
3935 
3936 .vb
3937       A B C
3938       D E F
3939       G H I
3940 .ve
3941 
3942    Where the submatrices A,B,C are owned by proc0, D,E,F are
3943    owned by proc1, G,H,I are owned by proc2.
3944 
3945    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3946    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3947    The 'M','N' parameters are 8,8, and have the same values on all procs.
3948 
3949    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3950    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3951    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3952    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3953    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3954    matrix, ans [DF] as another SeqAIJ matrix.
3955 
3956    When d_nz, o_nz parameters are specified, d_nz storage elements are
3957    allocated for every row of the local diagonal submatrix, and o_nz
3958    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3959    One way to choose d_nz and o_nz is to use the max nonzerors per local
3960    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3961    In this case, the values of d_nz,o_nz are:
3962 .vb
3963      proc0 : dnz = 2, o_nz = 2
3964      proc1 : dnz = 3, o_nz = 2
3965      proc2 : dnz = 1, o_nz = 4
3966 .ve
3967    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3968    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3969    for proc3. i.e we are using 12+15+10=37 storage locations to store
3970    34 values.
3971 
3972    When d_nnz, o_nnz parameters are specified, the storage is specified
3973    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3974    In the above case the values for d_nnz,o_nnz are:
3975 .vb
3976      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3977      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3978      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3979 .ve
3980    Here the space allocated is sum of all the above values i.e 34, and
3981    hence pre-allocation is perfect.
3982 
3983    Level: intermediate
3984 
3985 .keywords: matrix, aij, compressed row, sparse, parallel
3986 
3987 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3988           MPIAIJ, MatGetInfo()
3989 @*/
3990 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3991 {
3992   PetscErrorCode ierr;
3993 
3994   PetscFunctionBegin;
3995   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3996   PetscValidType(B,1);
3997   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3998   PetscFunctionReturn(0);
3999 }
4000 
4001 #undef __FUNCT__
4002 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4003 /*@
4004      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4005          CSR format the local rows.
4006 
4007    Collective on MPI_Comm
4008 
4009    Input Parameters:
4010 +  comm - MPI communicator
4011 .  m - number of local rows (Cannot be PETSC_DECIDE)
4012 .  n - This value should be the same as the local size used in creating the
4013        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4014        calculated if N is given) For square matrices n is almost always m.
4015 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4016 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4017 .   i - row indices
4018 .   j - column indices
4019 -   a - matrix values
4020 
4021    Output Parameter:
4022 .   mat - the matrix
4023 
4024    Level: intermediate
4025 
4026    Notes:
4027        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4028      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4029      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4030 
4031        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4032 
4033        The format which is used for the sparse matrix input, is equivalent to a
4034     row-major ordering.. i.e for the following matrix, the input data expected is
4035     as shown:
4036 
4037         1 0 0
4038         2 0 3     P0
4039        -------
4040         4 5 6     P1
4041 
4042      Process0 [P0]: rows_owned=[0,1]
4043         i =  {0,1,3}  [size = nrow+1  = 2+1]
4044         j =  {0,0,2}  [size = nz = 6]
4045         v =  {1,2,3}  [size = nz = 6]
4046 
4047      Process1 [P1]: rows_owned=[2]
4048         i =  {0,3}    [size = nrow+1  = 1+1]
4049         j =  {0,1,2}  [size = nz = 6]
4050         v =  {4,5,6}  [size = nz = 6]
4051 
4052 .keywords: matrix, aij, compressed row, sparse, parallel
4053 
4054 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4055           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4056 @*/
4057 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4058 {
4059   PetscErrorCode ierr;
4060 
4061   PetscFunctionBegin;
4062   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4063   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4064   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4065   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4066   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4067   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4068   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4069   PetscFunctionReturn(0);
4070 }
4071 
4072 #undef __FUNCT__
4073 #define __FUNCT__ "MatCreateAIJ"
4074 /*@C
4075    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4076    (the default parallel PETSc format).  For good matrix assembly performance
4077    the user should preallocate the matrix storage by setting the parameters
4078    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4079    performance can be increased by more than a factor of 50.
4080 
4081    Collective on MPI_Comm
4082 
4083    Input Parameters:
4084 +  comm - MPI communicator
4085 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4086            This value should be the same as the local size used in creating the
4087            y vector for the matrix-vector product y = Ax.
4088 .  n - This value should be the same as the local size used in creating the
4089        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4090        calculated if N is given) For square matrices n is almost always m.
4091 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4092 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4093 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4094            (same value is used for all local rows)
4095 .  d_nnz - array containing the number of nonzeros in the various rows of the
4096            DIAGONAL portion of the local submatrix (possibly different for each row)
4097            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4098            The size of this array is equal to the number of local rows, i.e 'm'.
4099 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4100            submatrix (same value is used for all local rows).
4101 -  o_nnz - array containing the number of nonzeros in the various rows of the
4102            OFF-DIAGONAL portion of the local submatrix (possibly different for
4103            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4104            structure. The size of this array is equal to the number
4105            of local rows, i.e 'm'.
4106 
4107    Output Parameter:
4108 .  A - the matrix
4109 
4110    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4111    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4112    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4113 
4114    Notes:
4115    If the *_nnz parameter is given then the *_nz parameter is ignored
4116 
4117    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4118    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4119    storage requirements for this matrix.
4120 
4121    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4122    processor than it must be used on all processors that share the object for
4123    that argument.
4124 
4125    The user MUST specify either the local or global matrix dimensions
4126    (possibly both).
4127 
4128    The parallel matrix is partitioned across processors such that the
4129    first m0 rows belong to process 0, the next m1 rows belong to
4130    process 1, the next m2 rows belong to process 2 etc.. where
4131    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4132    values corresponding to [m x N] submatrix.
4133 
4134    The columns are logically partitioned with the n0 columns belonging
4135    to 0th partition, the next n1 columns belonging to the next
4136    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4137 
4138    The DIAGONAL portion of the local submatrix on any given processor
4139    is the submatrix corresponding to the rows and columns m,n
4140    corresponding to the given processor. i.e diagonal matrix on
4141    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4142    etc. The remaining portion of the local submatrix [m x (N-n)]
4143    constitute the OFF-DIAGONAL portion. The example below better
4144    illustrates this concept.
4145 
4146    For a square global matrix we define each processor's diagonal portion
4147    to be its local rows and the corresponding columns (a square submatrix);
4148    each processor's off-diagonal portion encompasses the remainder of the
4149    local matrix (a rectangular submatrix).
4150 
4151    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4152 
4153    When calling this routine with a single process communicator, a matrix of
4154    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4155    type of communicator, use the construction mechanism:
4156      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4157 
4158    By default, this format uses inodes (identical nodes) when possible.
4159    We search for consecutive rows with the same nonzero structure, thereby
4160    reusing matrix information to achieve increased efficiency.
4161 
4162    Options Database Keys:
4163 +  -mat_no_inode  - Do not use inodes
4164 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4165 -  -mat_aij_oneindex - Internally use indexing starting at 1
4166         rather than 0.  Note that when calling MatSetValues(),
4167         the user still MUST index entries starting at 0!
4168 
4169 
4170    Example usage:
4171 
4172    Consider the following 8x8 matrix with 34 non-zero values, that is
4173    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4174    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4175    as follows:
4176 
4177 .vb
4178             1  2  0  |  0  3  0  |  0  4
4179     Proc0   0  5  6  |  7  0  0  |  8  0
4180             9  0 10  | 11  0  0  | 12  0
4181     -------------------------------------
4182            13  0 14  | 15 16 17  |  0  0
4183     Proc1   0 18  0  | 19 20 21  |  0  0
4184             0  0  0  | 22 23  0  | 24  0
4185     -------------------------------------
4186     Proc2  25 26 27  |  0  0 28  | 29  0
4187            30  0  0  | 31 32 33  |  0 34
4188 .ve
4189 
4190    This can be represented as a collection of submatrices as:
4191 
4192 .vb
4193       A B C
4194       D E F
4195       G H I
4196 .ve
4197 
4198    Where the submatrices A,B,C are owned by proc0, D,E,F are
4199    owned by proc1, G,H,I are owned by proc2.
4200 
4201    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4202    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4203    The 'M','N' parameters are 8,8, and have the same values on all procs.
4204 
4205    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4206    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4207    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4208    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4209    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4210    matrix, ans [DF] as another SeqAIJ matrix.
4211 
4212    When d_nz, o_nz parameters are specified, d_nz storage elements are
4213    allocated for every row of the local diagonal submatrix, and o_nz
4214    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4215    One way to choose d_nz and o_nz is to use the max nonzerors per local
4216    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4217    In this case, the values of d_nz,o_nz are:
4218 .vb
4219      proc0 : dnz = 2, o_nz = 2
4220      proc1 : dnz = 3, o_nz = 2
4221      proc2 : dnz = 1, o_nz = 4
4222 .ve
4223    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4224    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4225    for proc3. i.e we are using 12+15+10=37 storage locations to store
4226    34 values.
4227 
4228    When d_nnz, o_nnz parameters are specified, the storage is specified
4229    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4230    In the above case the values for d_nnz,o_nnz are:
4231 .vb
4232      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4233      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4234      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4235 .ve
4236    Here the space allocated is sum of all the above values i.e 34, and
4237    hence pre-allocation is perfect.
4238 
4239    Level: intermediate
4240 
4241 .keywords: matrix, aij, compressed row, sparse, parallel
4242 
4243 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4244           MPIAIJ, MatCreateMPIAIJWithArrays()
4245 @*/
4246 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4247 {
4248   PetscErrorCode ierr;
4249   PetscMPIInt    size;
4250 
4251   PetscFunctionBegin;
4252   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4253   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4255   if (size > 1) {
4256     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4257     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4258   } else {
4259     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4260     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4261   }
4262   PetscFunctionReturn(0);
4263 }
4264 
4265 #undef __FUNCT__
4266 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4267 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4268 {
4269   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4270 
4271   PetscFunctionBegin;
4272   *Ad     = a->A;
4273   *Ao     = a->B;
4274   *colmap = a->garray;
4275   PetscFunctionReturn(0);
4276 }
4277 
4278 #undef __FUNCT__
4279 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4280 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4281 {
4282   PetscErrorCode ierr;
4283   PetscInt       i;
4284   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4285 
4286   PetscFunctionBegin;
4287   if (coloring->ctype == IS_COLORING_GLOBAL) {
4288     ISColoringValue *allcolors,*colors;
4289     ISColoring      ocoloring;
4290 
4291     /* set coloring for diagonal portion */
4292     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4293 
4294     /* set coloring for off-diagonal portion */
4295     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4296     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4297     for (i=0; i<a->B->cmap->n; i++) {
4298       colors[i] = allcolors[a->garray[i]];
4299     }
4300     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4301     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4302     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4303     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4304   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4305     ISColoringValue *colors;
4306     PetscInt        *larray;
4307     ISColoring      ocoloring;
4308 
4309     /* set coloring for diagonal portion */
4310     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4311     for (i=0; i<a->A->cmap->n; i++) {
4312       larray[i] = i + A->cmap->rstart;
4313     }
4314     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4315     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4316     for (i=0; i<a->A->cmap->n; i++) {
4317       colors[i] = coloring->colors[larray[i]];
4318     }
4319     ierr = PetscFree(larray);CHKERRQ(ierr);
4320     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4321     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4322     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4323 
4324     /* set coloring for off-diagonal portion */
4325     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4326     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4327     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4328     for (i=0; i<a->B->cmap->n; i++) {
4329       colors[i] = coloring->colors[larray[i]];
4330     }
4331     ierr = PetscFree(larray);CHKERRQ(ierr);
4332     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4333     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4334     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4335   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4336 
4337   PetscFunctionReturn(0);
4338 }
4339 
4340 #if defined(PETSC_HAVE_ADIC)
4341 #undef __FUNCT__
4342 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4343 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4344 {
4345   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4346   PetscErrorCode ierr;
4347 
4348   PetscFunctionBegin;
4349   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4350   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4351   PetscFunctionReturn(0);
4352 }
4353 #endif
4354 
4355 #undef __FUNCT__
4356 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4357 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4358 {
4359   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4360   PetscErrorCode ierr;
4361 
4362   PetscFunctionBegin;
4363   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4364   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4365   PetscFunctionReturn(0);
4366 }
4367 
4368 #undef __FUNCT__
4369 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4370 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4371 {
4372   PetscErrorCode ierr;
4373   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4374   PetscInt       *indx;
4375 
4376   PetscFunctionBegin;
4377   /* This routine will ONLY return MPIAIJ type matrix */
4378   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4379   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4380   if (n == PETSC_DECIDE){
4381     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4382   }
4383   /* Check sum(n) = N */
4384   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4385   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4386 
4387   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4388   rstart -= m;
4389 
4390   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4391   for (i=0;i<m;i++) {
4392     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4393     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4394     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4395   }
4396 
4397   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4398   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4399   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4400   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4401   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4402   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4403   PetscFunctionReturn(0);
4404 }
4405 
4406 #undef __FUNCT__
4407 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4408 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4409 {
4410   PetscErrorCode ierr;
4411   PetscInt       m,N,i,rstart,nnz,Ii;
4412   PetscInt       *indx;
4413   PetscScalar    *values;
4414 
4415   PetscFunctionBegin;
4416   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4417   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4418   for (i=0;i<m;i++) {
4419     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4420     Ii    = i + rstart;
4421     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4422     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4423   }
4424   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4425   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4426   PetscFunctionReturn(0);
4427 }
4428 
4429 #undef __FUNCT__
4430 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4431 /*@
4432       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4433                  matrices from each processor
4434 
4435     Collective on MPI_Comm
4436 
4437    Input Parameters:
4438 +    comm - the communicators the parallel matrix will live on
4439 .    inmat - the input sequential matrices
4440 .    n - number of local columns (or PETSC_DECIDE)
4441 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4442 
4443    Output Parameter:
4444 .    outmat - the parallel matrix generated
4445 
4446     Level: advanced
4447 
4448    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4449 
4450 @*/
4451 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4452 {
4453   PetscErrorCode ierr;
4454 
4455   PetscFunctionBegin;
4456   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4457   if (scall == MAT_INITIAL_MATRIX){
4458     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4459   }
4460   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4461   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4462   PetscFunctionReturn(0);
4463 }
4464 
4465 #undef __FUNCT__
4466 #define __FUNCT__ "MatFileSplit"
4467 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4468 {
4469   PetscErrorCode    ierr;
4470   PetscMPIInt       rank;
4471   PetscInt          m,N,i,rstart,nnz;
4472   size_t            len;
4473   const PetscInt    *indx;
4474   PetscViewer       out;
4475   char              *name;
4476   Mat               B;
4477   const PetscScalar *values;
4478 
4479   PetscFunctionBegin;
4480   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4481   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4482   /* Should this be the type of the diagonal block of A? */
4483   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4484   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4485   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4486   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4487   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4488   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4489   for (i=0;i<m;i++) {
4490     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4491     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4492     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4493   }
4494   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4495   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4496 
4497   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4498   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4499   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4500   sprintf(name,"%s.%d",outfile,rank);
4501   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4502   ierr = PetscFree(name);
4503   ierr = MatView(B,out);CHKERRQ(ierr);
4504   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4505   ierr = MatDestroy(&B);CHKERRQ(ierr);
4506   PetscFunctionReturn(0);
4507 }
4508 
4509 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4510 #undef __FUNCT__
4511 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4512 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4513 {
4514   PetscErrorCode       ierr;
4515   Mat_Merge_SeqsToMPI  *merge;
4516   PetscContainer       container;
4517 
4518   PetscFunctionBegin;
4519   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4520   if (container) {
4521     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4522     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4523     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4524     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4525     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4526     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4527     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4528     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4529     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4530     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4531     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4532     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4533     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4534     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4535     ierr = PetscFree(merge);CHKERRQ(ierr);
4536     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4537   }
4538   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4539   PetscFunctionReturn(0);
4540 }
4541 
4542 #include <../src/mat/utils/freespace.h>
4543 #include <petscbt.h>
4544 
4545 #undef __FUNCT__
4546 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4547 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4548 {
4549   PetscErrorCode       ierr;
4550   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4551   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4552   PetscMPIInt          size,rank,taga,*len_s;
4553   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4554   PetscInt             proc,m;
4555   PetscInt             **buf_ri,**buf_rj;
4556   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4557   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4558   MPI_Request          *s_waits,*r_waits;
4559   MPI_Status           *status;
4560   MatScalar            *aa=a->a;
4561   MatScalar            **abuf_r,*ba_i;
4562   Mat_Merge_SeqsToMPI  *merge;
4563   PetscContainer       container;
4564 
4565   PetscFunctionBegin;
4566   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4567 
4568   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4569   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4570 
4571   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4572   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4573 
4574   bi     = merge->bi;
4575   bj     = merge->bj;
4576   buf_ri = merge->buf_ri;
4577   buf_rj = merge->buf_rj;
4578 
4579   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4580   owners = merge->rowmap->range;
4581   len_s  = merge->len_s;
4582 
4583   /* send and recv matrix values */
4584   /*-----------------------------*/
4585   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4586   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4587 
4588   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4589   for (proc=0,k=0; proc<size; proc++){
4590     if (!len_s[proc]) continue;
4591     i = owners[proc];
4592     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4593     k++;
4594   }
4595 
4596   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4597   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4598   ierr = PetscFree(status);CHKERRQ(ierr);
4599 
4600   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4601   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4602 
4603   /* insert mat values of mpimat */
4604   /*----------------------------*/
4605   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4606   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4607 
4608   for (k=0; k<merge->nrecv; k++){
4609     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4610     nrows = *(buf_ri_k[k]);
4611     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4612     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4613   }
4614 
4615   /* set values of ba */
4616   m = merge->rowmap->n;
4617   for (i=0; i<m; i++) {
4618     arow = owners[rank] + i;
4619     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4620     bnzi = bi[i+1] - bi[i];
4621     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4622 
4623     /* add local non-zero vals of this proc's seqmat into ba */
4624     anzi = ai[arow+1] - ai[arow];
4625     aj   = a->j + ai[arow];
4626     aa   = a->a + ai[arow];
4627     nextaj = 0;
4628     for (j=0; nextaj<anzi; j++){
4629       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4630         ba_i[j] += aa[nextaj++];
4631       }
4632     }
4633 
4634     /* add received vals into ba */
4635     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4636       /* i-th row */
4637       if (i == *nextrow[k]) {
4638         anzi = *(nextai[k]+1) - *nextai[k];
4639         aj   = buf_rj[k] + *(nextai[k]);
4640         aa   = abuf_r[k] + *(nextai[k]);
4641         nextaj = 0;
4642         for (j=0; nextaj<anzi; j++){
4643           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4644             ba_i[j] += aa[nextaj++];
4645           }
4646         }
4647         nextrow[k]++; nextai[k]++;
4648       }
4649     }
4650     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4651   }
4652   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4653   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4654 
4655   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4656   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4657   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4658   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4659   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4660   PetscFunctionReturn(0);
4661 }
4662 
4663 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4664 
4665 #undef __FUNCT__
4666 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4667 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4668 {
4669   PetscErrorCode       ierr;
4670   Mat                  B_mpi;
4671   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4672   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4673   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4674   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4675   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4676   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4677   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4678   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4679   MPI_Status           *status;
4680   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4681   PetscBT              lnkbt;
4682   Mat_Merge_SeqsToMPI  *merge;
4683   PetscContainer       container;
4684 
4685   PetscFunctionBegin;
4686   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4687 
4688   /* make sure it is a PETSc comm */
4689   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4690   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4691   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4692 
4693   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4694   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4695 
4696   /* determine row ownership */
4697   /*---------------------------------------------------------*/
4698   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4699   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4700   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4701   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4702   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4703   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4704   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4705 
4706   m      = merge->rowmap->n;
4707   M      = merge->rowmap->N;
4708   owners = merge->rowmap->range;
4709 
4710   /* determine the number of messages to send, their lengths */
4711   /*---------------------------------------------------------*/
4712   len_s  = merge->len_s;
4713 
4714   len = 0;  /* length of buf_si[] */
4715   merge->nsend = 0;
4716   for (proc=0; proc<size; proc++){
4717     len_si[proc] = 0;
4718     if (proc == rank){
4719       len_s[proc] = 0;
4720     } else {
4721       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4722       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4723     }
4724     if (len_s[proc]) {
4725       merge->nsend++;
4726       nrows = 0;
4727       for (i=owners[proc]; i<owners[proc+1]; i++){
4728         if (ai[i+1] > ai[i]) nrows++;
4729       }
4730       len_si[proc] = 2*(nrows+1);
4731       len += len_si[proc];
4732     }
4733   }
4734 
4735   /* determine the number and length of messages to receive for ij-structure */
4736   /*-------------------------------------------------------------------------*/
4737   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4738   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4739 
4740   /* post the Irecv of j-structure */
4741   /*-------------------------------*/
4742   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4743   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4744 
4745   /* post the Isend of j-structure */
4746   /*--------------------------------*/
4747   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4748 
4749   for (proc=0, k=0; proc<size; proc++){
4750     if (!len_s[proc]) continue;
4751     i = owners[proc];
4752     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4753     k++;
4754   }
4755 
4756   /* receives and sends of j-structure are complete */
4757   /*------------------------------------------------*/
4758   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4759   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4760 
4761   /* send and recv i-structure */
4762   /*---------------------------*/
4763   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4764   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4765 
4766   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4767   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4768   for (proc=0,k=0; proc<size; proc++){
4769     if (!len_s[proc]) continue;
4770     /* form outgoing message for i-structure:
4771          buf_si[0]:                 nrows to be sent
4772                [1:nrows]:           row index (global)
4773                [nrows+1:2*nrows+1]: i-structure index
4774     */
4775     /*-------------------------------------------*/
4776     nrows = len_si[proc]/2 - 1;
4777     buf_si_i    = buf_si + nrows+1;
4778     buf_si[0]   = nrows;
4779     buf_si_i[0] = 0;
4780     nrows = 0;
4781     for (i=owners[proc]; i<owners[proc+1]; i++){
4782       anzi = ai[i+1] - ai[i];
4783       if (anzi) {
4784         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4785         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4786         nrows++;
4787       }
4788     }
4789     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4790     k++;
4791     buf_si += len_si[proc];
4792   }
4793 
4794   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4795   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4796 
4797   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4798   for (i=0; i<merge->nrecv; i++){
4799     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4800   }
4801 
4802   ierr = PetscFree(len_si);CHKERRQ(ierr);
4803   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4804   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4805   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4806   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4807   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4808   ierr = PetscFree(status);CHKERRQ(ierr);
4809 
4810   /* compute a local seq matrix in each processor */
4811   /*----------------------------------------------*/
4812   /* allocate bi array and free space for accumulating nonzero column info */
4813   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4814   bi[0] = 0;
4815 
4816   /* create and initialize a linked list */
4817   nlnk = N+1;
4818   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4819 
4820   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4821   len = 0;
4822   len  = ai[owners[rank+1]] - ai[owners[rank]];
4823   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4824   current_space = free_space;
4825 
4826   /* determine symbolic info for each local row */
4827   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4828 
4829   for (k=0; k<merge->nrecv; k++){
4830     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4831     nrows = *buf_ri_k[k];
4832     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4833     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4834   }
4835 
4836   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4837   len = 0;
4838   for (i=0;i<m;i++) {
4839     bnzi   = 0;
4840     /* add local non-zero cols of this proc's seqmat into lnk */
4841     arow   = owners[rank] + i;
4842     anzi   = ai[arow+1] - ai[arow];
4843     aj     = a->j + ai[arow];
4844     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4845     bnzi += nlnk;
4846     /* add received col data into lnk */
4847     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4848       if (i == *nextrow[k]) { /* i-th row */
4849         anzi = *(nextai[k]+1) - *nextai[k];
4850         aj   = buf_rj[k] + *nextai[k];
4851         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4852         bnzi += nlnk;
4853         nextrow[k]++; nextai[k]++;
4854       }
4855     }
4856     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4857 
4858     /* if free space is not available, make more free space */
4859     if (current_space->local_remaining<bnzi) {
4860       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4861       nspacedouble++;
4862     }
4863     /* copy data into free space, then initialize lnk */
4864     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4865     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4866 
4867     current_space->array           += bnzi;
4868     current_space->local_used      += bnzi;
4869     current_space->local_remaining -= bnzi;
4870 
4871     bi[i+1] = bi[i] + bnzi;
4872   }
4873 
4874   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4875 
4876   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4877   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4878   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4879 
4880   /* create symbolic parallel matrix B_mpi */
4881   /*---------------------------------------*/
4882     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4883   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4884   if (n==PETSC_DECIDE) {
4885     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4886   } else {
4887     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4888   }
4889   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4890   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4891   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4892   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4893   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4894 
4895   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4896   B_mpi->assembled     = PETSC_FALSE;
4897   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4898   merge->bi            = bi;
4899   merge->bj            = bj;
4900   merge->buf_ri        = buf_ri;
4901   merge->buf_rj        = buf_rj;
4902   merge->coi           = PETSC_NULL;
4903   merge->coj           = PETSC_NULL;
4904   merge->owners_co     = PETSC_NULL;
4905 
4906   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4907 
4908   /* attach the supporting struct to B_mpi for reuse */
4909   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4910   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4911   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4912   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4913   *mpimat = B_mpi;
4914 
4915   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4916   PetscFunctionReturn(0);
4917 }
4918 
4919 #undef __FUNCT__
4920 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4921 /*@C
4922       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4923                  matrices from each processor
4924 
4925     Collective on MPI_Comm
4926 
4927    Input Parameters:
4928 +    comm - the communicators the parallel matrix will live on
4929 .    seqmat - the input sequential matrices
4930 .    m - number of local rows (or PETSC_DECIDE)
4931 .    n - number of local columns (or PETSC_DECIDE)
4932 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4933 
4934    Output Parameter:
4935 .    mpimat - the parallel matrix generated
4936 
4937     Level: advanced
4938 
4939    Notes:
4940      The dimensions of the sequential matrix in each processor MUST be the same.
4941      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4942      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4943 @*/
4944 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4945 {
4946   PetscErrorCode   ierr;
4947   PetscMPIInt     size;
4948 
4949   PetscFunctionBegin;
4950   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4951   if (size == 1){
4952      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4953      if (scall == MAT_INITIAL_MATRIX){
4954        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4955      } else {
4956        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4957      }
4958      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4959      PetscFunctionReturn(0);
4960   }
4961   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4962   if (scall == MAT_INITIAL_MATRIX){
4963     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4964   }
4965   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4966   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4967   PetscFunctionReturn(0);
4968 }
4969 
4970 #undef __FUNCT__
4971 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4972 /*@
4973      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4974           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4975           with MatGetSize()
4976 
4977     Not Collective
4978 
4979    Input Parameters:
4980 +    A - the matrix
4981 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4982 
4983    Output Parameter:
4984 .    A_loc - the local sequential matrix generated
4985 
4986     Level: developer
4987 
4988 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4989 
4990 @*/
4991 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4992 {
4993   PetscErrorCode  ierr;
4994   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4995   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4996   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4997   MatScalar       *aa=a->a,*ba=b->a,*cam;
4998   PetscScalar     *ca;
4999   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5000   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5001   PetscBool       match;
5002 
5003   PetscFunctionBegin;
5004   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5005   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5006   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5007   if (scall == MAT_INITIAL_MATRIX){
5008     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5009     ci[0] = 0;
5010     for (i=0; i<am; i++){
5011       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5012     }
5013     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5014     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5015     k = 0;
5016     for (i=0; i<am; i++) {
5017       ncols_o = bi[i+1] - bi[i];
5018       ncols_d = ai[i+1] - ai[i];
5019       /* off-diagonal portion of A */
5020       for (jo=0; jo<ncols_o; jo++) {
5021         col = cmap[*bj];
5022         if (col >= cstart) break;
5023         cj[k]   = col; bj++;
5024         ca[k++] = *ba++;
5025       }
5026       /* diagonal portion of A */
5027       for (j=0; j<ncols_d; j++) {
5028         cj[k]   = cstart + *aj++;
5029         ca[k++] = *aa++;
5030       }
5031       /* off-diagonal portion of A */
5032       for (j=jo; j<ncols_o; j++) {
5033         cj[k]   = cmap[*bj++];
5034         ca[k++] = *ba++;
5035       }
5036     }
5037     /* put together the new matrix */
5038     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5039     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5040     /* Since these are PETSc arrays, change flags to free them as necessary. */
5041     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5042     mat->free_a  = PETSC_TRUE;
5043     mat->free_ij = PETSC_TRUE;
5044     mat->nonew   = 0;
5045   } else if (scall == MAT_REUSE_MATRIX){
5046     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5047     ci = mat->i; cj = mat->j; cam = mat->a;
5048     for (i=0; i<am; i++) {
5049       /* off-diagonal portion of A */
5050       ncols_o = bi[i+1] - bi[i];
5051       for (jo=0; jo<ncols_o; jo++) {
5052         col = cmap[*bj];
5053         if (col >= cstart) break;
5054         *cam++ = *ba++; bj++;
5055       }
5056       /* diagonal portion of A */
5057       ncols_d = ai[i+1] - ai[i];
5058       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5059       /* off-diagonal portion of A */
5060       for (j=jo; j<ncols_o; j++) {
5061         *cam++ = *ba++; bj++;
5062       }
5063     }
5064   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5065   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5066   PetscFunctionReturn(0);
5067 }
5068 
5069 #undef __FUNCT__
5070 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5071 /*@C
5072      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5073 
5074     Not Collective
5075 
5076    Input Parameters:
5077 +    A - the matrix
5078 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5079 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5080 
5081    Output Parameter:
5082 .    A_loc - the local sequential matrix generated
5083 
5084     Level: developer
5085 
5086 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5087 
5088 @*/
5089 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5090 {
5091   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5092   PetscErrorCode    ierr;
5093   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5094   IS                isrowa,iscola;
5095   Mat               *aloc;
5096   PetscBool       match;
5097 
5098   PetscFunctionBegin;
5099   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5100   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5101   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5102   if (!row){
5103     start = A->rmap->rstart; end = A->rmap->rend;
5104     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5105   } else {
5106     isrowa = *row;
5107   }
5108   if (!col){
5109     start = A->cmap->rstart;
5110     cmap  = a->garray;
5111     nzA   = a->A->cmap->n;
5112     nzB   = a->B->cmap->n;
5113     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5114     ncols = 0;
5115     for (i=0; i<nzB; i++) {
5116       if (cmap[i] < start) idx[ncols++] = cmap[i];
5117       else break;
5118     }
5119     imark = i;
5120     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5121     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5122     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5123   } else {
5124     iscola = *col;
5125   }
5126   if (scall != MAT_INITIAL_MATRIX){
5127     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5128     aloc[0] = *A_loc;
5129   }
5130   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5131   *A_loc = aloc[0];
5132   ierr = PetscFree(aloc);CHKERRQ(ierr);
5133   if (!row){
5134     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5135   }
5136   if (!col){
5137     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5138   }
5139   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5140   PetscFunctionReturn(0);
5141 }
5142 
5143 #undef __FUNCT__
5144 #define __FUNCT__ "MatGetBrowsOfAcols"
5145 /*@C
5146     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5147 
5148     Collective on Mat
5149 
5150    Input Parameters:
5151 +    A,B - the matrices in mpiaij format
5152 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5153 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5154 
5155    Output Parameter:
5156 +    rowb, colb - index sets of rows and columns of B to extract
5157 -    B_seq - the sequential matrix generated
5158 
5159     Level: developer
5160 
5161 @*/
5162 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5163 {
5164   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5165   PetscErrorCode    ierr;
5166   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5167   IS                isrowb,iscolb;
5168   Mat               *bseq=PETSC_NULL;
5169 
5170   PetscFunctionBegin;
5171   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5172     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5173   }
5174   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5175 
5176   if (scall == MAT_INITIAL_MATRIX){
5177     start = A->cmap->rstart;
5178     cmap  = a->garray;
5179     nzA   = a->A->cmap->n;
5180     nzB   = a->B->cmap->n;
5181     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5182     ncols = 0;
5183     for (i=0; i<nzB; i++) {  /* row < local row index */
5184       if (cmap[i] < start) idx[ncols++] = cmap[i];
5185       else break;
5186     }
5187     imark = i;
5188     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5189     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5190     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5191     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5192   } else {
5193     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5194     isrowb = *rowb; iscolb = *colb;
5195     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5196     bseq[0] = *B_seq;
5197   }
5198   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5199   *B_seq = bseq[0];
5200   ierr = PetscFree(bseq);CHKERRQ(ierr);
5201   if (!rowb){
5202     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5203   } else {
5204     *rowb = isrowb;
5205   }
5206   if (!colb){
5207     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5208   } else {
5209     *colb = iscolb;
5210   }
5211   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5212   PetscFunctionReturn(0);
5213 }
5214 
5215 #undef __FUNCT__
5216 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5217 /*
5218     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5219     of the OFF-DIAGONAL portion of local A
5220 
5221     Collective on Mat
5222 
5223    Input Parameters:
5224 +    A,B - the matrices in mpiaij format
5225 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5226 
5227    Output Parameter:
5228 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5229 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5230 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5231 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5232 
5233     Level: developer
5234 
5235 */
5236 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5237 {
5238   VecScatter_MPI_General *gen_to,*gen_from;
5239   PetscErrorCode         ierr;
5240   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5241   Mat_SeqAIJ             *b_oth;
5242   VecScatter             ctx=a->Mvctx;
5243   MPI_Comm               comm=((PetscObject)ctx)->comm;
5244   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5245   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5246   PetscScalar            *rvalues,*svalues;
5247   MatScalar              *b_otha,*bufa,*bufA;
5248   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5249   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5250   MPI_Status             *sstatus,rstatus;
5251   PetscMPIInt            jj;
5252   PetscInt               *cols,sbs,rbs;
5253   PetscScalar            *vals;
5254 
5255   PetscFunctionBegin;
5256   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5257     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5258   }
5259   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5260   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5261 
5262   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5263   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5264   rvalues  = gen_from->values; /* holds the length of receiving row */
5265   svalues  = gen_to->values;   /* holds the length of sending row */
5266   nrecvs   = gen_from->n;
5267   nsends   = gen_to->n;
5268 
5269   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5270   srow     = gen_to->indices;   /* local row index to be sent */
5271   sstarts  = gen_to->starts;
5272   sprocs   = gen_to->procs;
5273   sstatus  = gen_to->sstatus;
5274   sbs      = gen_to->bs;
5275   rstarts  = gen_from->starts;
5276   rprocs   = gen_from->procs;
5277   rbs      = gen_from->bs;
5278 
5279   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5280   if (scall == MAT_INITIAL_MATRIX){
5281     /* i-array */
5282     /*---------*/
5283     /*  post receives */
5284     for (i=0; i<nrecvs; i++){
5285       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5286       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5287       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5288     }
5289 
5290     /* pack the outgoing message */
5291     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5292     sstartsj[0] = 0;  rstartsj[0] = 0;
5293     len = 0; /* total length of j or a array to be sent */
5294     k = 0;
5295     for (i=0; i<nsends; i++){
5296       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5297       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5298       for (j=0; j<nrows; j++) {
5299         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5300         for (l=0; l<sbs; l++){
5301           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5302           rowlen[j*sbs+l] = ncols;
5303           len += ncols;
5304           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5305         }
5306         k++;
5307       }
5308       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5309       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5310     }
5311     /* recvs and sends of i-array are completed */
5312     i = nrecvs;
5313     while (i--) {
5314       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5315     }
5316     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5317 
5318     /* allocate buffers for sending j and a arrays */
5319     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5320     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5321 
5322     /* create i-array of B_oth */
5323     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5324     b_othi[0] = 0;
5325     len = 0; /* total length of j or a array to be received */
5326     k = 0;
5327     for (i=0; i<nrecvs; i++){
5328       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5329       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5330       for (j=0; j<nrows; j++) {
5331         b_othi[k+1] = b_othi[k] + rowlen[j];
5332         len += rowlen[j]; k++;
5333       }
5334       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5335     }
5336 
5337     /* allocate space for j and a arrrays of B_oth */
5338     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5339     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5340 
5341     /* j-array */
5342     /*---------*/
5343     /*  post receives of j-array */
5344     for (i=0; i<nrecvs; i++){
5345       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5346       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5347     }
5348 
5349     /* pack the outgoing message j-array */
5350     k = 0;
5351     for (i=0; i<nsends; i++){
5352       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5353       bufJ = bufj+sstartsj[i];
5354       for (j=0; j<nrows; j++) {
5355         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5356         for (ll=0; ll<sbs; ll++){
5357           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5358           for (l=0; l<ncols; l++){
5359             *bufJ++ = cols[l];
5360           }
5361           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5362         }
5363       }
5364       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5365     }
5366 
5367     /* recvs and sends of j-array are completed */
5368     i = nrecvs;
5369     while (i--) {
5370       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5371     }
5372     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5373   } else if (scall == MAT_REUSE_MATRIX){
5374     sstartsj = *startsj_s;
5375     rstartsj = *startsj_r;
5376     bufa     = *bufa_ptr;
5377     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5378     b_otha   = b_oth->a;
5379   } else {
5380     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5381   }
5382 
5383   /* a-array */
5384   /*---------*/
5385   /*  post receives of a-array */
5386   for (i=0; i<nrecvs; i++){
5387     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5388     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5389   }
5390 
5391   /* pack the outgoing message a-array */
5392   k = 0;
5393   for (i=0; i<nsends; i++){
5394     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5395     bufA = bufa+sstartsj[i];
5396     for (j=0; j<nrows; j++) {
5397       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5398       for (ll=0; ll<sbs; ll++){
5399         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5400         for (l=0; l<ncols; l++){
5401           *bufA++ = vals[l];
5402         }
5403         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5404       }
5405     }
5406     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5407   }
5408   /* recvs and sends of a-array are completed */
5409   i = nrecvs;
5410   while (i--) {
5411     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5412   }
5413   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5414   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5415 
5416   if (scall == MAT_INITIAL_MATRIX){
5417     /* put together the new matrix */
5418     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5419 
5420     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5421     /* Since these are PETSc arrays, change flags to free them as necessary. */
5422     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5423     b_oth->free_a  = PETSC_TRUE;
5424     b_oth->free_ij = PETSC_TRUE;
5425     b_oth->nonew   = 0;
5426 
5427     ierr = PetscFree(bufj);CHKERRQ(ierr);
5428     if (!startsj_s || !bufa_ptr){
5429       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5430       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5431     } else {
5432       *startsj_s = sstartsj;
5433       *startsj_r = rstartsj;
5434       *bufa_ptr  = bufa;
5435     }
5436   }
5437   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5438   PetscFunctionReturn(0);
5439 }
5440 
5441 #undef __FUNCT__
5442 #define __FUNCT__ "MatGetCommunicationStructs"
5443 /*@C
5444   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5445 
5446   Not Collective
5447 
5448   Input Parameters:
5449 . A - The matrix in mpiaij format
5450 
5451   Output Parameter:
5452 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5453 . colmap - A map from global column index to local index into lvec
5454 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5455 
5456   Level: developer
5457 
5458 @*/
5459 #if defined (PETSC_USE_CTABLE)
5460 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5461 #else
5462 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5463 #endif
5464 {
5465   Mat_MPIAIJ *a;
5466 
5467   PetscFunctionBegin;
5468   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5469   PetscValidPointer(lvec, 2);
5470   PetscValidPointer(colmap, 3);
5471   PetscValidPointer(multScatter, 4);
5472   a = (Mat_MPIAIJ *) A->data;
5473   if (lvec) *lvec = a->lvec;
5474   if (colmap) *colmap = a->colmap;
5475   if (multScatter) *multScatter = a->Mvctx;
5476   PetscFunctionReturn(0);
5477 }
5478 
5479 EXTERN_C_BEGIN
5480 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5481 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5482 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5483 EXTERN_C_END
5484 
5485 #undef __FUNCT__
5486 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5487 /*
5488     Computes (B'*A')' since computing B*A directly is untenable
5489 
5490                n                       p                          p
5491         (              )       (              )         (                  )
5492       m (      A       )  *  n (       B      )   =   m (         C        )
5493         (              )       (              )         (                  )
5494 
5495 */
5496 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5497 {
5498   PetscErrorCode     ierr;
5499   Mat                At,Bt,Ct;
5500 
5501   PetscFunctionBegin;
5502   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5503   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5504   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5505   ierr = MatDestroy(&At);CHKERRQ(ierr);
5506   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5507   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5508   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5509   PetscFunctionReturn(0);
5510 }
5511 
5512 #undef __FUNCT__
5513 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5514 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5515 {
5516   PetscErrorCode ierr;
5517   PetscInt       m=A->rmap->n,n=B->cmap->n;
5518   Mat            Cmat;
5519 
5520   PetscFunctionBegin;
5521   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5522   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5523   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5524   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5525   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5526   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5527   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5528   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5529   *C   = Cmat;
5530   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5531   PetscFunctionReturn(0);
5532 }
5533 
5534 /* ----------------------------------------------------------------*/
5535 #undef __FUNCT__
5536 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5537 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5538 {
5539   PetscErrorCode ierr;
5540 
5541   PetscFunctionBegin;
5542   if (scall == MAT_INITIAL_MATRIX){
5543     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5544   }
5545   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5546   PetscFunctionReturn(0);
5547 }
5548 
5549 EXTERN_C_BEGIN
5550 #if defined(PETSC_HAVE_MUMPS)
5551 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5552 #endif
5553 #if defined(PETSC_HAVE_PASTIX)
5554 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5555 #endif
5556 #if defined(PETSC_HAVE_SUPERLU_DIST)
5557 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5558 #endif
5559 #if defined(PETSC_HAVE_CLIQUE)
5560 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5561 #endif
5562 EXTERN_C_END
5563 
5564 /*MC
5565    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5566 
5567    Options Database Keys:
5568 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5569 
5570   Level: beginner
5571 
5572 .seealso: MatCreateAIJ()
5573 M*/
5574 
5575 EXTERN_C_BEGIN
5576 #undef __FUNCT__
5577 #define __FUNCT__ "MatCreate_MPIAIJ"
5578 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5579 {
5580   Mat_MPIAIJ     *b;
5581   PetscErrorCode ierr;
5582   PetscMPIInt    size;
5583 
5584   PetscFunctionBegin;
5585   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5586   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5587   B->data         = (void*)b;
5588   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5589   B->assembled    = PETSC_FALSE;
5590   B->insertmode   = NOT_SET_VALUES;
5591   b->size         = size;
5592   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5593 
5594   /* build cache for off array entries formed */
5595   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5596   b->donotstash  = PETSC_FALSE;
5597   b->colmap      = 0;
5598   b->garray      = 0;
5599   b->roworiented = PETSC_TRUE;
5600 
5601   /* stuff used for matrix vector multiply */
5602   b->lvec      = PETSC_NULL;
5603   b->Mvctx     = PETSC_NULL;
5604 
5605   /* stuff for MatGetRow() */
5606   b->rowindices   = 0;
5607   b->rowvalues    = 0;
5608   b->getrowactive = PETSC_FALSE;
5609 
5610   /* flexible pointer used in CUSP/CUSPARSE classes */
5611   b->spptr        = PETSC_NULL;
5612 
5613 #if defined(PETSC_HAVE_MUMPS)
5614   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5615                                      "MatGetFactor_aij_mumps",
5616                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5617 #endif
5618 #if defined(PETSC_HAVE_PASTIX)
5619   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5620 					   "MatGetFactor_mpiaij_pastix",
5621 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5622 #endif
5623 #if defined(PETSC_HAVE_SUPERLU_DIST)
5624   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5625                                      "MatGetFactor_mpiaij_superlu_dist",
5626                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5627 #endif
5628 #if defined(PETSC_HAVE_CLIQUE)
5629   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5630                                      "MatGetFactor_aij_clique",
5631                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5632 #endif
5633   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5634                                      "MatStoreValues_MPIAIJ",
5635                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5636   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5637                                      "MatRetrieveValues_MPIAIJ",
5638                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5639   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5640 				     "MatGetDiagonalBlock_MPIAIJ",
5641                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5642   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5643 				     "MatIsTranspose_MPIAIJ",
5644 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5645   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5646 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5647 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5649 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5650 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5651   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5652 				     "MatDiagonalScaleLocal_MPIAIJ",
5653 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5654   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5655                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5656                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5657   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5658                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5659                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5660   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5661                                      "MatConvert_MPIAIJ_MPISBAIJ",
5662                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5664                                      "MatMatMult_MPIDense_MPIAIJ",
5665                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5667                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5668                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5670                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5671                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5672   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5673   PetscFunctionReturn(0);
5674 }
5675 EXTERN_C_END
5676 
5677 #undef __FUNCT__
5678 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5679 /*@
5680      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5681          and "off-diagonal" part of the matrix in CSR format.
5682 
5683    Collective on MPI_Comm
5684 
5685    Input Parameters:
5686 +  comm - MPI communicator
5687 .  m - number of local rows (Cannot be PETSC_DECIDE)
5688 .  n - This value should be the same as the local size used in creating the
5689        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5690        calculated if N is given) For square matrices n is almost always m.
5691 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5692 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5693 .   i - row indices for "diagonal" portion of matrix
5694 .   j - column indices
5695 .   a - matrix values
5696 .   oi - row indices for "off-diagonal" portion of matrix
5697 .   oj - column indices
5698 -   oa - matrix values
5699 
5700    Output Parameter:
5701 .   mat - the matrix
5702 
5703    Level: advanced
5704 
5705    Notes:
5706        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5707        must free the arrays once the matrix has been destroyed and not before.
5708 
5709        The i and j indices are 0 based
5710 
5711        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5712 
5713        This sets local rows and cannot be used to set off-processor values.
5714 
5715        You cannot later use MatSetValues() to change values in this matrix.
5716 
5717 .keywords: matrix, aij, compressed row, sparse, parallel
5718 
5719 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5720           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5721 @*/
5722 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5723 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5724 {
5725   PetscErrorCode ierr;
5726   Mat_MPIAIJ     *maij;
5727 
5728  PetscFunctionBegin;
5729   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5730   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5731   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5732   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5733   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5734   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5735   maij = (Mat_MPIAIJ*) (*mat)->data;
5736   maij->donotstash     = PETSC_TRUE;
5737   (*mat)->preallocated = PETSC_TRUE;
5738 
5739   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5740   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5741 
5742   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5743   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5744 
5745   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5746   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5747   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5748   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5749 
5750   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5751   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5752   PetscFunctionReturn(0);
5753 }
5754 
5755 /*
5756     Special version for direct calls from Fortran
5757 */
5758 #include <petsc-private/fortranimpl.h>
5759 
5760 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5761 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5762 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5763 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5764 #endif
5765 
5766 /* Change these macros so can be used in void function */
5767 #undef CHKERRQ
5768 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5769 #undef SETERRQ2
5770 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5771 #undef SETERRQ3
5772 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5773 #undef SETERRQ
5774 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5775 
5776 EXTERN_C_BEGIN
5777 #undef __FUNCT__
5778 #define __FUNCT__ "matsetvaluesmpiaij_"
5779 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5780 {
5781   Mat             mat = *mmat;
5782   PetscInt        m = *mm, n = *mn;
5783   InsertMode      addv = *maddv;
5784   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5785   PetscScalar     value;
5786   PetscErrorCode  ierr;
5787 
5788   MatCheckPreallocated(mat,1);
5789   if (mat->insertmode == NOT_SET_VALUES) {
5790     mat->insertmode = addv;
5791   }
5792 #if defined(PETSC_USE_DEBUG)
5793   else if (mat->insertmode != addv) {
5794     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5795   }
5796 #endif
5797   {
5798   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5799   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5800   PetscBool       roworiented = aij->roworiented;
5801 
5802   /* Some Variables required in the macro */
5803   Mat             A = aij->A;
5804   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5805   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5806   MatScalar       *aa = a->a;
5807   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5808   Mat             B = aij->B;
5809   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5810   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5811   MatScalar       *ba = b->a;
5812 
5813   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5814   PetscInt        nonew = a->nonew;
5815   MatScalar       *ap1,*ap2;
5816 
5817   PetscFunctionBegin;
5818   for (i=0; i<m; i++) {
5819     if (im[i] < 0) continue;
5820 #if defined(PETSC_USE_DEBUG)
5821     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5822 #endif
5823     if (im[i] >= rstart && im[i] < rend) {
5824       row      = im[i] - rstart;
5825       lastcol1 = -1;
5826       rp1      = aj + ai[row];
5827       ap1      = aa + ai[row];
5828       rmax1    = aimax[row];
5829       nrow1    = ailen[row];
5830       low1     = 0;
5831       high1    = nrow1;
5832       lastcol2 = -1;
5833       rp2      = bj + bi[row];
5834       ap2      = ba + bi[row];
5835       rmax2    = bimax[row];
5836       nrow2    = bilen[row];
5837       low2     = 0;
5838       high2    = nrow2;
5839 
5840       for (j=0; j<n; j++) {
5841         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5842         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5843         if (in[j] >= cstart && in[j] < cend){
5844           col = in[j] - cstart;
5845           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5846         } else if (in[j] < 0) continue;
5847 #if defined(PETSC_USE_DEBUG)
5848         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5849 #endif
5850         else {
5851           if (mat->was_assembled) {
5852             if (!aij->colmap) {
5853               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5854             }
5855 #if defined (PETSC_USE_CTABLE)
5856             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5857 	    col--;
5858 #else
5859             col = aij->colmap[in[j]] - 1;
5860 #endif
5861             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5862               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5863               col =  in[j];
5864               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5865               B = aij->B;
5866               b = (Mat_SeqAIJ*)B->data;
5867               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5868               rp2      = bj + bi[row];
5869               ap2      = ba + bi[row];
5870               rmax2    = bimax[row];
5871               nrow2    = bilen[row];
5872               low2     = 0;
5873               high2    = nrow2;
5874               bm       = aij->B->rmap->n;
5875               ba = b->a;
5876             }
5877           } else col = in[j];
5878           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5879         }
5880       }
5881     } else {
5882       if (!aij->donotstash) {
5883         if (roworiented) {
5884           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5885         } else {
5886           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5887         }
5888       }
5889     }
5890   }}
5891   PetscFunctionReturnVoid();
5892 }
5893 EXTERN_C_END
5894 
5895