xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision debe9ee2755b2e6ee26f815bab511dcc308e711d)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
187     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
188     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
189     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
190     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
191     rowners[0] = 0;
192     for (i=2; i<=size; i++) {
193       rowners[i] += rowners[i-1];
194     }
195     rstart = rowners[rank];
196     rend   = rowners[rank+1];
197     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
198     if (!rank) {
199       gmata = (Mat_SeqAIJ*) gmat->data;
200       /* send row lengths to all processors */
201       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202       for (i=1; i<size; i++) {
203 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
204       }
205       /* determine number diagonal and off-diagonal counts */
206       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
207       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
208       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
209       jj = 0;
210       for (i=0; i<m; i++) {
211 	for (j=0; j<dlens[i]; j++) {
212           if (gmata->j[jj] < rstart) ld[i]++;
213 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214 	  jj++;
215 	}
216       }
217       /* send column indices to other processes */
218       for (i=1; i<size; i++) {
219 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
222       }
223 
224       /* send numerical values to other processes */
225       for (i=1; i<size; i++) {
226         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
228       }
229       gmataa = gmata->a;
230       gmataj = gmata->j;
231 
232     } else {
233       /* receive row lengths */
234       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
235       /* receive column indices */
236       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
237       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
238       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
239       /* determine number diagonal and off-diagonal counts */
240       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
241       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
242       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
243       jj = 0;
244       for (i=0; i<m; i++) {
245 	for (j=0; j<dlens[i]; j++) {
246           if (gmataj[jj] < rstart) ld[i]++;
247 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248 	  jj++;
249 	}
250       }
251       /* receive numerical values */
252       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
253       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
254     }
255     /* set preallocation */
256     for (i=0; i<m; i++) {
257       dlens[i] -= olens[i];
258     }
259     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
260     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
261 
262     for (i=0; i<m; i++) {
263       dlens[i] += olens[i];
264     }
265     cnt  = 0;
266     for (i=0; i<m; i++) {
267       row  = rstart + i;
268       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
269       cnt += dlens[i];
270     }
271     if (rank) {
272       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
273     }
274     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
275     ierr = PetscFree(rowners);CHKERRQ(ierr);
276     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277     *inmat = mat;
278   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
279     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281     mat   = *inmat;
282     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
283     if (!rank) {
284       /* send numerical values to other processes */
285       gmata = (Mat_SeqAIJ*) gmat->data;
286       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
287       gmataa = gmata->a;
288       for (i=1; i<size; i++) {
289         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
291       }
292       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293     } else {
294       /* receive numerical values from process 0*/
295       nz   = Ad->nz + Ao->nz;
296       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
297       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
298     }
299     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301     ad = Ad->a;
302     ao = Ao->a;
303     if (mat->rmap->n) {
304       i  = 0;
305       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
306       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
307     }
308     for (i=1; i<mat->rmap->n; i++) {
309       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
310       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
311     }
312     i--;
313     if (mat->rmap->n) {
314       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
315     }
316     if (rank) {
317       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
318     }
319   }
320   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   CHKMEMQ;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   Local utility routine that creates a mapping from the global column
328 number to the local number in the off-diagonal part of the local
329 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
330 a slightly higher hash table cost; without it it is not scalable (each processor
331 has an order N integer array but is fast to acess.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336 {
337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
338   PetscErrorCode ierr;
339   PetscInt       n = aij->B->cmap->n,i;
340 
341   PetscFunctionBegin;
342   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343 #if defined (PETSC_USE_CTABLE)
344   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
345   for (i=0; i<n; i++){
346     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
347   }
348 #else
349   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
350   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
352   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353 #endif
354   PetscFunctionReturn(0);
355 }
356 
357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358 { \
359     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360     lastcol1 = col;\
361     while (high1-low1 > 5) { \
362       t = (low1+high1)/2; \
363       if (rp1[t] > col) high1 = t; \
364       else             low1  = t; \
365     } \
366       for (_i=low1; _i<high1; _i++) { \
367         if (rp1[_i] > col) break; \
368         if (rp1[_i] == col) { \
369           if (addv == ADD_VALUES) ap1[_i] += value;   \
370           else                    ap1[_i] = value; \
371           goto a_noinsert; \
372         } \
373       }  \
374       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378       N = nrow1++ - 1; a->nz++; high1++; \
379       /* shift up all the later entries in this row */ \
380       for (ii=N; ii>=_i; ii--) { \
381         rp1[ii+1] = rp1[ii]; \
382         ap1[ii+1] = ap1[ii]; \
383       } \
384       rp1[_i] = col;  \
385       ap1[_i] = value;  \
386       a_noinsert: ; \
387       ailen[row] = nrow1; \
388 }
389 
390 
391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392 { \
393     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394     lastcol2 = col;\
395     while (high2-low2 > 5) { \
396       t = (low2+high2)/2; \
397       if (rp2[t] > col) high2 = t; \
398       else             low2  = t; \
399     } \
400     for (_i=low2; _i<high2; _i++) {		\
401       if (rp2[_i] > col) break;			\
402       if (rp2[_i] == col) {			      \
403 	if (addv == ADD_VALUES) ap2[_i] += value;     \
404 	else                    ap2[_i] = value;      \
405 	goto b_noinsert;			      \
406       }						      \
407     }							      \
408     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
410     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412     N = nrow2++ - 1; b->nz++; high2++;					\
413     /* shift up all the later entries in this row */			\
414     for (ii=N; ii>=_i; ii--) {						\
415       rp2[ii+1] = rp2[ii];						\
416       ap2[ii+1] = ap2[ii];						\
417     }									\
418     rp2[_i] = col;							\
419     ap2[_i] = value;							\
420     b_noinsert: ;								\
421     bilen[row] = nrow2;							\
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427 {
428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
430   PetscErrorCode ierr;
431   PetscInt       l,*garray = mat->garray,diag;
432 
433   PetscFunctionBegin;
434   /* code only works for square matrices A */
435 
436   /* find size of row to the left of the diagonal part */
437   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
438   row  = row - diag;
439   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440     if (garray[b->j[b->i[row]+l]] > diag) break;
441   }
442   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
443 
444   /* diagonal part */
445   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
446 
447   /* right of diagonal part */
448   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValues_MPIAIJ"
454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscScalar    value;
458   PetscErrorCode ierr;
459   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461   PetscBool      roworiented = aij->roworiented;
462 
463   /* Some Variables required in the macro */
464   Mat            A = aij->A;
465   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
466   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467   MatScalar      *aa = a->a;
468   PetscBool      ignorezeroentries = a->ignorezeroentries;
469   Mat            B = aij->B;
470   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
471   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472   MatScalar      *ba = b->a;
473 
474   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475   PetscInt       nonew;
476   MatScalar      *ap1,*ap2;
477 
478   PetscFunctionBegin;
479   if (v) PetscValidScalarPointer(v,6);
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_DEBUG)
483     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row      = im[i] - rstart;
487       lastcol1 = -1;
488       rp1      = aj + ai[row];
489       ap1      = aa + ai[row];
490       rmax1    = aimax[row];
491       nrow1    = ailen[row];
492       low1     = 0;
493       high1    = nrow1;
494       lastcol2 = -1;
495       rp2      = bj + bi[row];
496       ap2      = ba + bi[row];
497       rmax2    = bimax[row];
498       nrow2    = bilen[row];
499       low2     = 0;
500       high2    = nrow2;
501 
502       for (j=0; j<n; j++) {
503         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505         if (in[j] >= cstart && in[j] < cend){
506           col = in[j] - cstart;
507           nonew = a->nonew;
508           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509         } else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!aij->colmap) {
516               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 #if defined (PETSC_USE_CTABLE)
519             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520 	    col--;
521 #else
522             col = aij->colmap[in[j]] - 1;
523 #endif
524             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526               col =  in[j];
527               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528               B = aij->B;
529               b = (Mat_SeqAIJ*)B->data;
530               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531               rp2      = bj + bi[row];
532               ap2      = ba + bi[row];
533               rmax2    = bimax[row];
534               nrow2    = bilen[row];
535               low2     = 0;
536               high2    = nrow2;
537               bm       = aij->B->rmap->n;
538               ba = b->a;
539             }
540           } else col = in[j];
541           nonew = b->nonew;
542           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543         }
544       }
545     } else {
546       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547       if (!aij->donotstash) {
548         mat->assembled = PETSC_FALSE;
549         if (roworiented) {
550           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatGetValues_MPIAIJ"
562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563 {
564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
565   PetscErrorCode ierr;
566   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
568 
569   PetscFunctionBegin;
570   for (i=0; i<m; i++) {
571     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573     if (idxm[i] >= rstart && idxm[i] < rend) {
574       row = idxm[i] - rstart;
575       for (j=0; j<n; j++) {
576         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578         if (idxn[j] >= cstart && idxn[j] < cend){
579           col = idxn[j] - cstart;
580           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581         } else {
582           if (!aij->colmap) {
583             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
584           }
585 #if defined (PETSC_USE_CTABLE)
586           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
587           col --;
588 #else
589           col = aij->colmap[idxn[j]] - 1;
590 #endif
591           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592           else {
593             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
594           }
595         }
596       }
597     } else {
598       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609 {
610   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
611   PetscErrorCode ierr;
612   PetscInt       nstash,reallocs;
613   InsertMode     addv;
614 
615   PetscFunctionBegin;
616   if (aij->donotstash || mat->nooffprocentries) {
617     PetscFunctionReturn(0);
618   }
619 
620   /* make sure all processors are either in INSERTMODE or ADDMODE */
621   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
622   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
637   PetscErrorCode ierr;
638   PetscMPIInt    n;
639   PetscInt       i,j,rstart,ncols,flg;
640   PetscInt       *row,*col;
641   PetscBool      other_disassembled;
642   PetscScalar    *val;
643   InsertMode     addv = mat->insertmode;
644 
645   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
646   PetscFunctionBegin;
647   if (!aij->donotstash && !mat->nooffprocentries) {
648     while (1) {
649       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
650       if (!flg) break;
651 
652       for (i=0; i<n;) {
653         /* Now identify the consecutive vals belonging to the same row */
654         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655         if (j < n) ncols = j-i;
656         else       ncols = n-i;
657         /* Now assemble all these values with a single function call */
658         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
659         i = j;
660       }
661     }
662     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
663   }
664   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
665   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
666 
667   /* determine if any processor has disassembled, if so we must
668      also disassemble ourselfs, in order that we may reassemble. */
669   /*
670      if nonzero structure of submatrix B cannot change then we know that
671      no processor disassembled thus we can skip this stuff
672   */
673   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
674     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
675     if (mat->was_assembled && !other_disassembled) {
676       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
677     }
678   }
679   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
681   }
682   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
684   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
685   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
686 
687   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
688   aij->rowvalues = 0;
689 
690   /* used by MatAXPY() */
691   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
692   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
693 
694   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
695   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702 {
703   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
708   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatZeroRows_MPIAIJ"
714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715 {
716   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode    ierr;
718   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719   PetscInt          i,*owners = A->rmap->range;
720   PetscInt          *nprocs,j,idx,nsends,row;
721   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
722   PetscInt          *rvalues,count,base,slen,*source;
723   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
724   MPI_Comm          comm = ((PetscObject)A)->comm;
725   MPI_Request       *send_waits,*recv_waits;
726   MPI_Status        recv_status,*send_status;
727   const PetscScalar *xx;
728   PetscScalar       *bb;
729 #if defined(PETSC_DEBUG)
730   PetscBool      found = PETSC_FALSE;
731 #endif
732 
733   PetscFunctionBegin;
734   /*  first count number of contributors to each processor */
735   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
736   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
737   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
738   j = 0;
739   for (i=0; i<N; i++) {
740     if (lastidx > (idx = rows[i])) j = 0;
741     lastidx = idx;
742     for (; j<size; j++) {
743       if (idx >= owners[j] && idx < owners[j+1]) {
744         nprocs[2*j]++;
745         nprocs[2*j+1] = 1;
746         owner[i] = j;
747 #if defined(PETSC_DEBUG)
748         found = PETSC_TRUE;
749 #endif
750         break;
751       }
752     }
753 #if defined(PETSC_DEBUG)
754     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755     found = PETSC_FALSE;
756 #endif
757   }
758   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
759 
760   if (A->nooffproczerorows) {
761     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762     nrecvs = nsends;
763     nmax   = N;
764   } else {
765     /* inform other processors of number of messages and max length*/
766     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
767   }
768 
769   /* post receives:   */
770   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
771   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
772   for (i=0; i<nrecvs; i++) {
773     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
774   }
775 
776   /* do sends:
777       1) starts[i] gives the starting index in svalues for stuff going to
778          the ith processor
779   */
780   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
781   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
782   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
783   starts[0] = 0;
784   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785   for (i=0; i<N; i++) {
786     svalues[starts[owner[i]]++] = rows[i];
787   }
788 
789   starts[0] = 0;
790   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791   count = 0;
792   for (i=0; i<size; i++) {
793     if (nprocs[2*i+1]) {
794       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
795     }
796   }
797   ierr = PetscFree(starts);CHKERRQ(ierr);
798 
799   base = owners[rank];
800 
801   /*  wait on receives */
802   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
803   count  = nrecvs; slen = 0;
804   while (count) {
805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
806     /* unpack receives into our local space */
807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
808     source[imdex]  = recv_status.MPI_SOURCE;
809     lens[imdex]    = n;
810     slen          += n;
811     count--;
812   }
813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
814 
815   /* move the data into the send scatter */
816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
817   count = 0;
818   for (i=0; i<nrecvs; i++) {
819     values = rvalues + i*nmax;
820     for (j=0; j<lens[i]; j++) {
821       lrows[count++] = values[j] - base;
822     }
823   }
824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
826   ierr = PetscFree(owner);CHKERRQ(ierr);
827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
828 
829   /* fix right hand side if needed */
830   if (x && b) {
831     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
833     for (i=0; i<slen; i++) {
834       bb[lrows[i]] = diag*xx[lrows[i]];
835     }
836     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
837     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
838   }
839   /*
840         Zero the required rows. If the "diagonal block" of the matrix
841      is square and the user wishes to set the diagonal we use separate
842      code so that MatSetValues() is not called for each diagonal allocating
843      new memory, thus calling lots of mallocs and slowing things down.
844 
845   */
846   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
850   } else if (diag != 0.0) {
851     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
852     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855     }
856     for (i = 0; i < slen; i++) {
857       row  = lrows[i] + rstart;
858       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
859     }
860     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   } else {
863     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(lrows);CHKERRQ(ierr);
866 
867   /* wait on sends */
868   if (nsends) {
869     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
870     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
871     ierr = PetscFree(send_status);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(send_waits);CHKERRQ(ierr);
874   ierr = PetscFree(svalues);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNCT__
879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881 {
882   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
883   PetscErrorCode    ierr;
884   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885   PetscInt          i,*owners = A->rmap->range;
886   PetscInt          *nprocs,j,idx,nsends;
887   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
888   PetscInt          *rvalues,count,base,slen,*source;
889   PetscInt          *lens,*lrows,*values,m;
890   MPI_Comm          comm = ((PetscObject)A)->comm;
891   MPI_Request       *send_waits,*recv_waits;
892   MPI_Status        recv_status,*send_status;
893   const PetscScalar *xx;
894   PetscScalar       *bb,*mask;
895   Vec               xmask,lmask;
896   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
897   const PetscInt    *aj, *ii,*ridx;
898   PetscScalar       *aa;
899 #if defined(PETSC_DEBUG)
900   PetscBool         found = PETSC_FALSE;
901 #endif
902 
903   PetscFunctionBegin;
904   /*  first count number of contributors to each processor */
905   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
906   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
907   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
908   j = 0;
909   for (i=0; i<N; i++) {
910     if (lastidx > (idx = rows[i])) j = 0;
911     lastidx = idx;
912     for (; j<size; j++) {
913       if (idx >= owners[j] && idx < owners[j+1]) {
914         nprocs[2*j]++;
915         nprocs[2*j+1] = 1;
916         owner[i] = j;
917 #if defined(PETSC_DEBUG)
918         found = PETSC_TRUE;
919 #endif
920         break;
921       }
922     }
923 #if defined(PETSC_DEBUG)
924     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925     found = PETSC_FALSE;
926 #endif
927   }
928   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
929 
930   /* inform other processors of number of messages and max length*/
931   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
932 
933   /* post receives:   */
934   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
935   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
936   for (i=0; i<nrecvs; i++) {
937     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
938   }
939 
940   /* do sends:
941       1) starts[i] gives the starting index in svalues for stuff going to
942          the ith processor
943   */
944   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
945   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
946   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
947   starts[0] = 0;
948   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949   for (i=0; i<N; i++) {
950     svalues[starts[owner[i]]++] = rows[i];
951   }
952 
953   starts[0] = 0;
954   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955   count = 0;
956   for (i=0; i<size; i++) {
957     if (nprocs[2*i+1]) {
958       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
959     }
960   }
961   ierr = PetscFree(starts);CHKERRQ(ierr);
962 
963   base = owners[rank];
964 
965   /*  wait on receives */
966   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
967   count  = nrecvs; slen = 0;
968   while (count) {
969     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
970     /* unpack receives into our local space */
971     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
972     source[imdex]  = recv_status.MPI_SOURCE;
973     lens[imdex]    = n;
974     slen          += n;
975     count--;
976   }
977   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
978 
979   /* move the data into the send scatter */
980   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
981   count = 0;
982   for (i=0; i<nrecvs; i++) {
983     values = rvalues + i*nmax;
984     for (j=0; j<lens[i]; j++) {
985       lrows[count++] = values[j] - base;
986     }
987   }
988   ierr = PetscFree(rvalues);CHKERRQ(ierr);
989   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
990   ierr = PetscFree(owner);CHKERRQ(ierr);
991   ierr = PetscFree(nprocs);CHKERRQ(ierr);
992   /* lrows are the local rows to be zeroed, slen is the number of local rows */
993 
994   /* zero diagonal part of matrix */
995   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
996 
997   /* handle off diagonal part of matrix */
998   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
999   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1000   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1001   for (i=0; i<slen; i++) {
1002     bb[lrows[i]] = 1;
1003   }
1004   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1005   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1008   if (x) {
1009     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1013   }
1014   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1015 
1016   /* remove zeroed rows of off diagonal matrix */
1017   ii = aij->i;
1018   for (i=0; i<slen; i++) {
1019     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1020   }
1021 
1022   /* loop over all elements of off process part of matrix zeroing removed columns*/
1023   if (aij->compressedrow.use){
1024     m    = aij->compressedrow.nrows;
1025     ii   = aij->compressedrow.i;
1026     ridx = aij->compressedrow.rindex;
1027     for (i=0; i<m; i++){
1028       n   = ii[i+1] - ii[i];
1029       aj  = aij->j + ii[i];
1030       aa  = aij->a + ii[i];
1031 
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[*ridx] -= *aa*xx[*aj];
1035           *aa        = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040       ridx++;
1041     }
1042   } else { /* do not use compressed row format */
1043     m = l->B->rmap->n;
1044     for (i=0; i<m; i++) {
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048       for (j=0; j<n; j++) {
1049         if (PetscAbsScalar(mask[*aj])) {
1050           if (b) bb[i] -= *aa*xx[*aj];
1051           *aa    = 0.0;
1052         }
1053         aa++;
1054         aj++;
1055       }
1056     }
1057   }
1058   if (x) {
1059     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1060     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1061   }
1062   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1063   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1064   ierr = PetscFree(lrows);CHKERRQ(ierr);
1065 
1066   /* wait on sends */
1067   if (nsends) {
1068     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1069     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1070     ierr = PetscFree(send_status);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1073   ierr = PetscFree(svalues);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMult_MPIAIJ"
1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       nt;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1088   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129   PetscBool      merged;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   if (!merged) {
1136     /* send it on its way */
1137     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     /* do local part */
1139     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140     /* receive remote parts: note this assumes the values are not actually */
1141     /* added in yy until the next line, */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   } else {
1144     /* do local part */
1145     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1146     /* send it on its way */
1147     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148     /* values actually were received in the Begin() but we need to call this nop */
1149     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1157 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Easy test: symmetric diagonal block */
1170   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195    PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1213   /* receive remote parts */
1214   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225 {
1226   PetscErrorCode ierr;
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228 
1229   PetscFunctionBegin;
1230   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatScale_MPIAIJ"
1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239 {
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1245   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatDestroy_MPIAIJ"
1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined (PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1273   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1274   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291 {
1292   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1293   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1294   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1297   int               fd;
1298   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1299   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300   PetscScalar       *column_values;
1301   PetscInt          message_count,flowcontrolcount;
1302 
1303   PetscFunctionBegin;
1304   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1305   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1306   nz   = A->nz + B->nz;
1307   if (!rank) {
1308     header[0] = MAT_FILE_CLASSID;
1309     header[1] = mat->rmap->N;
1310     header[2] = mat->cmap->N;
1311     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1312     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1313     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1314     /* get largest number of rows any processor has */
1315     rlen = mat->rmap->n;
1316     range = mat->rmap->range;
1317     for (i=1; i<size; i++) {
1318       rlen = PetscMax(rlen,range[i+1] - range[i]);
1319     }
1320   } else {
1321     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1322     rlen = mat->rmap->n;
1323   }
1324 
1325   /* load up the local row counts */
1326   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1327   for (i=0; i<mat->rmap->n; i++) {
1328     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1329   }
1330 
1331   /* store the row lengths to the file */
1332   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1333   if (!rank) {
1334     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     for (i=1; i<size; i++) {
1336       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1337       rlen = range[i+1] - range[i];
1338       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1339       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1340     }
1341     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1342   } else {
1343     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1344     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1346   }
1347   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1348 
1349   /* load up the local column indices */
1350   nzmax = nz; /* )th processor needs space a largest processor needs */
1351   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1352   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1353   cnt  = 0;
1354   for (i=0; i<mat->rmap->n; i++) {
1355     for (j=B->i[i]; j<B->i[i+1]; j++) {
1356       if ( (col = garray[B->j[j]]) > cstart) break;
1357       column_indices[cnt++] = col;
1358     }
1359     for (k=A->i[i]; k<A->i[i+1]; k++) {
1360       column_indices[cnt++] = A->j[k] + cstart;
1361     }
1362     for (; j<B->i[i+1]; j++) {
1363       column_indices[cnt++] = garray[B->j[j]];
1364     }
1365   }
1366   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1367 
1368   /* store the column indices to the file */
1369    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1370   if (!rank) {
1371     MPI_Status status;
1372     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1373     for (i=1; i<size; i++) {
1374       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1375       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1376       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1377       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1378       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1379     }
1380      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1381   } else {
1382     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1383     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1384     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1385     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1386   }
1387   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1388 
1389   /* load up the local column values */
1390   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1391   cnt  = 0;
1392   for (i=0; i<mat->rmap->n; i++) {
1393     for (j=B->i[i]; j<B->i[i+1]; j++) {
1394       if ( garray[B->j[j]] > cstart) break;
1395       column_values[cnt++] = B->a[j];
1396     }
1397     for (k=A->i[i]; k<A->i[i+1]; k++) {
1398       column_values[cnt++] = A->a[k];
1399     }
1400     for (; j<B->i[i+1]; j++) {
1401       column_values[cnt++] = B->a[j];
1402     }
1403   }
1404   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1405 
1406   /* store the column values to the file */
1407    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1408   if (!rank) {
1409     MPI_Status status;
1410     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1411     for (i=1; i<size; i++) {
1412        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1413       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1414       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1415       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1416       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1417     }
1418     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1419   } else {
1420     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1421     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1422     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1424   }
1425   ierr = PetscFree(column_values);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1432 {
1433   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1434   PetscErrorCode    ierr;
1435   PetscMPIInt       rank = aij->rank,size = aij->size;
1436   PetscBool         isdraw,iascii,isbinary;
1437   PetscViewer       sviewer;
1438   PetscViewerFormat format;
1439 
1440   PetscFunctionBegin;
1441   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1442   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1443   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1444   if (iascii) {
1445     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1446     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1447       MatInfo    info;
1448       PetscBool  inodes;
1449 
1450       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1451       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1452       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1453       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1454       if (!inodes) {
1455         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1456 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1457       } else {
1458         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1459 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1460       }
1461       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1463       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1464       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1465       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1467       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1468       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1469       PetscFunctionReturn(0);
1470     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1471       PetscInt   inodecount,inodelimit,*inodes;
1472       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1473       if (inodes) {
1474         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1475       } else {
1476         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1477       }
1478       PetscFunctionReturn(0);
1479     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1480       PetscFunctionReturn(0);
1481     }
1482   } else if (isbinary) {
1483     if (size == 1) {
1484       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1485       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1486     } else {
1487       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1488     }
1489     PetscFunctionReturn(0);
1490   } else if (isdraw) {
1491     PetscDraw  draw;
1492     PetscBool  isnull;
1493     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1494     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1495   }
1496 
1497   if (size == 1) {
1498     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1499     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1500   } else {
1501     /* assemble the entire matrix onto first processor. */
1502     Mat         A;
1503     Mat_SeqAIJ  *Aloc;
1504     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1505     MatScalar   *a;
1506 
1507     if (mat->rmap->N > 1024) {
1508       PetscBool  flg = PETSC_FALSE;
1509 
1510       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1511       if (!flg) {
1512         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1513       }
1514     }
1515 
1516     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1517     if (!rank) {
1518       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1519     } else {
1520       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1521     }
1522     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1523     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1524     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1525     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1526     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1527 
1528     /* copy over the A part */
1529     Aloc = (Mat_SeqAIJ*)aij->A->data;
1530     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1531     row = mat->rmap->rstart;
1532     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1533     for (i=0; i<m; i++) {
1534       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1535       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1536     }
1537     aj = Aloc->j;
1538     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1539 
1540     /* copy over the B part */
1541     Aloc = (Mat_SeqAIJ*)aij->B->data;
1542     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1543     row  = mat->rmap->rstart;
1544     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1545     ct   = cols;
1546     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1547     for (i=0; i<m; i++) {
1548       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1549       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1550     }
1551     ierr = PetscFree(ct);CHKERRQ(ierr);
1552     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1554     /*
1555        Everyone has to call to draw the matrix since the graphics waits are
1556        synchronized across all processors that share the PetscDraw object
1557     */
1558     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1559     if (!rank) {
1560       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1561       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1562       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1563       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1564     }
1565     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1566     ierr = MatDestroy(&A);CHKERRQ(ierr);
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatView_MPIAIJ"
1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1574 {
1575   PetscErrorCode ierr;
1576   PetscBool      iascii,isdraw,issocket,isbinary;
1577 
1578   PetscFunctionBegin;
1579   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1580   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1581   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1582   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1583   if (iascii || isdraw || isbinary || issocket) {
1584     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1585   } else {
1586     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatSOR_MPIAIJ"
1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1594 {
1595   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1596   PetscErrorCode ierr;
1597   Vec            bb1 = 0;
1598   PetscBool      hasop;
1599 
1600   PetscFunctionBegin;
1601   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1602     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1603   }
1604 
1605   if (flag == SOR_APPLY_UPPER) {
1606     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1607     PetscFunctionReturn(0);
1608   }
1609 
1610   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1611     if (flag & SOR_ZERO_INITIAL_GUESS) {
1612       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1613       its--;
1614     }
1615 
1616     while (its--) {
1617       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1618       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1619 
1620       /* update rhs: bb1 = bb - B*x */
1621       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1622       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1623 
1624       /* local sweep */
1625       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1626     }
1627   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1628     if (flag & SOR_ZERO_INITIAL_GUESS) {
1629       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1630       its--;
1631     }
1632     while (its--) {
1633       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1634       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1635 
1636       /* update rhs: bb1 = bb - B*x */
1637       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1638       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1639 
1640       /* local sweep */
1641       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1642     }
1643   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1644     if (flag & SOR_ZERO_INITIAL_GUESS) {
1645       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1646       its--;
1647     }
1648     while (its--) {
1649       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1650       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1651 
1652       /* update rhs: bb1 = bb - B*x */
1653       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1654       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1655 
1656       /* local sweep */
1657       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1658     }
1659   }  else if (flag & SOR_EISENSTAT) {
1660     Vec         xx1;
1661 
1662     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1663     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1664 
1665     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1666     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667     if (!mat->diag) {
1668       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1669       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1670     }
1671     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1672     if (hasop) {
1673       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1674     } else {
1675       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1676     }
1677     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1678 
1679     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1680 
1681     /* local sweep */
1682     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1683     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1684     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1685   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1686 
1687   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatPermute_MPIAIJ"
1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1694 {
1695   MPI_Comm       comm;
1696   PetscInt       first,local_rowsize,local_colsize;
1697   const PetscInt *rows;
1698   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1699   PetscErrorCode ierr;
1700 
1701   PetscFunctionBegin;
1702   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1703   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1704   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1705   /* collect the global row permutation and invert it */
1706   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1707   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1708   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1709   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1710   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1711   /* get the local target indices */
1712   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1713   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1714   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1715   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1716   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1717   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1718   /* the column permutation is so much easier;
1719      make a local version of 'colp' and invert it */
1720   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1721   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1722   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1723   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1724   /* now we just get the submatrix */
1725   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1726   /* clean up */
1727   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1728   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1735 {
1736   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1737   Mat            A = mat->A,B = mat->B;
1738   PetscErrorCode ierr;
1739   PetscReal      isend[5],irecv[5];
1740 
1741   PetscFunctionBegin;
1742   info->block_size     = 1.0;
1743   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1744   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1745   isend[3] = info->memory;  isend[4] = info->mallocs;
1746   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1747   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1748   isend[3] += info->memory;  isend[4] += info->mallocs;
1749   if (flag == MAT_LOCAL) {
1750     info->nz_used      = isend[0];
1751     info->nz_allocated = isend[1];
1752     info->nz_unneeded  = isend[2];
1753     info->memory       = isend[3];
1754     info->mallocs      = isend[4];
1755   } else if (flag == MAT_GLOBAL_MAX) {
1756     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1757     info->nz_used      = irecv[0];
1758     info->nz_allocated = irecv[1];
1759     info->nz_unneeded  = irecv[2];
1760     info->memory       = irecv[3];
1761     info->mallocs      = irecv[4];
1762   } else if (flag == MAT_GLOBAL_SUM) {
1763     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1764     info->nz_used      = irecv[0];
1765     info->nz_allocated = irecv[1];
1766     info->nz_unneeded  = irecv[2];
1767     info->memory       = irecv[3];
1768     info->mallocs      = irecv[4];
1769   }
1770   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1771   info->fill_ratio_needed = 0;
1772   info->factor_mallocs    = 0;
1773 
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatSetOption_MPIAIJ"
1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1780 {
1781   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1782   PetscErrorCode ierr;
1783 
1784   PetscFunctionBegin;
1785   switch (op) {
1786   case MAT_NEW_NONZERO_LOCATIONS:
1787   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1788   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1789   case MAT_KEEP_NONZERO_PATTERN:
1790   case MAT_NEW_NONZERO_LOCATION_ERR:
1791   case MAT_USE_INODES:
1792   case MAT_IGNORE_ZERO_ENTRIES:
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1795     break;
1796   case MAT_ROW_ORIENTED:
1797     a->roworiented = flg;
1798     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1799     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1800     break;
1801   case MAT_NEW_DIAGONALS:
1802     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1803     break;
1804   case MAT_IGNORE_OFF_PROC_ENTRIES:
1805     a->donotstash = flg;
1806     break;
1807   case MAT_SPD:
1808     A->spd_set                         = PETSC_TRUE;
1809     A->spd                             = flg;
1810     if (flg) {
1811       A->symmetric                     = PETSC_TRUE;
1812       A->structurally_symmetric        = PETSC_TRUE;
1813       A->symmetric_set                 = PETSC_TRUE;
1814       A->structurally_symmetric_set    = PETSC_TRUE;
1815     }
1816     break;
1817   case MAT_SYMMETRIC:
1818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_STRUCTURALLY_SYMMETRIC:
1821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1822     break;
1823   case MAT_HERMITIAN:
1824     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1825     break;
1826   case MAT_SYMMETRY_ETERNAL:
1827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1828     break;
1829 #if defined(PETSC_HAVE_TXPETSCGPU)
1830     // Sorry, I don't know how else to add MatOptions for data specific
1831     // to GPU classes without protecting against it in this case statement.
1832     // If anyone knows of a better way, let me know. Paul
1833   case MAT_DIAGBLOCK_CSR:
1834   case MAT_OFFDIAGBLOCK_CSR:
1835   case MAT_CSR:
1836   case MAT_DIAGBLOCK_DIA:
1837   case MAT_OFFDIAGBLOCK_DIA:
1838   case MAT_DIA:
1839   case MAT_DIAGBLOCK_ELL:
1840   case MAT_OFFDIAGBLOCK_ELL:
1841   case MAT_ELL:
1842   case MAT_DIAGBLOCK_HYB:
1843   case MAT_OFFDIAGBLOCK_HYB:
1844   case MAT_HYB:
1845     /* Not an error because MatSetOption_MPIAIJCUSP/CUSPARSE handle these options */
1846     break;
1847 #endif
1848   default:
1849     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1850   }
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 #undef __FUNCT__
1855 #define __FUNCT__ "MatGetRow_MPIAIJ"
1856 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1857 {
1858   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1859   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1860   PetscErrorCode ierr;
1861   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1862   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1863   PetscInt       *cmap,*idx_p;
1864 
1865   PetscFunctionBegin;
1866   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1867   mat->getrowactive = PETSC_TRUE;
1868 
1869   if (!mat->rowvalues && (idx || v)) {
1870     /*
1871         allocate enough space to hold information from the longest row.
1872     */
1873     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1874     PetscInt   max = 1,tmp;
1875     for (i=0; i<matin->rmap->n; i++) {
1876       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1877       if (max < tmp) { max = tmp; }
1878     }
1879     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1880   }
1881 
1882   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1883   lrow = row - rstart;
1884 
1885   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1886   if (!v)   {pvA = 0; pvB = 0;}
1887   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1888   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1889   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1890   nztot = nzA + nzB;
1891 
1892   cmap  = mat->garray;
1893   if (v  || idx) {
1894     if (nztot) {
1895       /* Sort by increasing column numbers, assuming A and B already sorted */
1896       PetscInt imark = -1;
1897       if (v) {
1898         *v = v_p = mat->rowvalues;
1899         for (i=0; i<nzB; i++) {
1900           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1901           else break;
1902         }
1903         imark = i;
1904         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1905         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1906       }
1907       if (idx) {
1908         *idx = idx_p = mat->rowindices;
1909         if (imark > -1) {
1910           for (i=0; i<imark; i++) {
1911             idx_p[i] = cmap[cworkB[i]];
1912           }
1913         } else {
1914           for (i=0; i<nzB; i++) {
1915             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1916             else break;
1917           }
1918           imark = i;
1919         }
1920         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1921         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1922       }
1923     } else {
1924       if (idx) *idx = 0;
1925       if (v)   *v   = 0;
1926     }
1927   }
1928   *nz = nztot;
1929   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1930   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1936 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1937 {
1938   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1939 
1940   PetscFunctionBegin;
1941   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1942   aij->getrowactive = PETSC_FALSE;
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatNorm_MPIAIJ"
1948 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1949 {
1950   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1951   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1952   PetscErrorCode ierr;
1953   PetscInt       i,j,cstart = mat->cmap->rstart;
1954   PetscReal      sum = 0.0;
1955   MatScalar      *v;
1956 
1957   PetscFunctionBegin;
1958   if (aij->size == 1) {
1959     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1960   } else {
1961     if (type == NORM_FROBENIUS) {
1962       v = amat->a;
1963       for (i=0; i<amat->nz; i++) {
1964 #if defined(PETSC_USE_COMPLEX)
1965         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1966 #else
1967         sum += (*v)*(*v); v++;
1968 #endif
1969       }
1970       v = bmat->a;
1971       for (i=0; i<bmat->nz; i++) {
1972 #if defined(PETSC_USE_COMPLEX)
1973         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1974 #else
1975         sum += (*v)*(*v); v++;
1976 #endif
1977       }
1978       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1979       *norm = PetscSqrtReal(*norm);
1980     } else if (type == NORM_1) { /* max column norm */
1981       PetscReal *tmp,*tmp2;
1982       PetscInt  *jj,*garray = aij->garray;
1983       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1984       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1985       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1986       *norm = 0.0;
1987       v = amat->a; jj = amat->j;
1988       for (j=0; j<amat->nz; j++) {
1989         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1990       }
1991       v = bmat->a; jj = bmat->j;
1992       for (j=0; j<bmat->nz; j++) {
1993         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1994       }
1995       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1996       for (j=0; j<mat->cmap->N; j++) {
1997         if (tmp2[j] > *norm) *norm = tmp2[j];
1998       }
1999       ierr = PetscFree(tmp);CHKERRQ(ierr);
2000       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2001     } else if (type == NORM_INFINITY) { /* max row norm */
2002       PetscReal ntemp = 0.0;
2003       for (j=0; j<aij->A->rmap->n; j++) {
2004         v = amat->a + amat->i[j];
2005         sum = 0.0;
2006         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2007           sum += PetscAbsScalar(*v); v++;
2008         }
2009         v = bmat->a + bmat->i[j];
2010         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2011           sum += PetscAbsScalar(*v); v++;
2012         }
2013         if (sum > ntemp) ntemp = sum;
2014       }
2015       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2016     } else {
2017       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2018     }
2019   }
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatTranspose_MPIAIJ"
2025 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2026 {
2027   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2028   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2029   PetscErrorCode ierr;
2030   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2031   PetscInt       cstart=A->cmap->rstart,ncol;
2032   Mat            B;
2033   MatScalar      *array;
2034 
2035   PetscFunctionBegin;
2036   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2037 
2038   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2039   ai = Aloc->i; aj = Aloc->j;
2040   bi = Bloc->i; bj = Bloc->j;
2041   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2042     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2043     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2044     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2045     for (i=0; i<ai[ma]; i++){
2046       d_nnz[aj[i]] ++;
2047       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2048     }
2049 
2050     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2051     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2052     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2053     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2054     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2055     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2056     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2057   } else {
2058     B = *matout;
2059   }
2060 
2061   /* copy over the A part */
2062   array = Aloc->a;
2063   row = A->rmap->rstart;
2064   for (i=0; i<ma; i++) {
2065     ncol = ai[i+1]-ai[i];
2066     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2067     row++; array += ncol; aj += ncol;
2068   }
2069   aj = Aloc->j;
2070   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2071 
2072   /* copy over the B part */
2073   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2074   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2075   array = Bloc->a;
2076   row = A->rmap->rstart;
2077   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2078   cols_tmp = cols;
2079   for (i=0; i<mb; i++) {
2080     ncol = bi[i+1]-bi[i];
2081     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2082     row++; array += ncol; cols_tmp += ncol;
2083   }
2084   ierr = PetscFree(cols);CHKERRQ(ierr);
2085 
2086   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2087   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2088   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2089     *matout = B;
2090   } else {
2091     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2092   }
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 #undef __FUNCT__
2097 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2098 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2099 {
2100   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2101   Mat            a = aij->A,b = aij->B;
2102   PetscErrorCode ierr;
2103   PetscInt       s1,s2,s3;
2104 
2105   PetscFunctionBegin;
2106   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2107   if (rr) {
2108     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2109     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2110     /* Overlap communication with computation. */
2111     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2112   }
2113   if (ll) {
2114     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2115     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2116     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2117   }
2118   /* scale  the diagonal block */
2119   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2120 
2121   if (rr) {
2122     /* Do a scatter end and then right scale the off-diagonal block */
2123     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2124     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2125   }
2126 
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 #undef __FUNCT__
2131 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2132 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2133 {
2134   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2135   PetscErrorCode ierr;
2136 
2137   PetscFunctionBegin;
2138   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatEqual_MPIAIJ"
2144 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2145 {
2146   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2147   Mat            a,b,c,d;
2148   PetscBool      flg;
2149   PetscErrorCode ierr;
2150 
2151   PetscFunctionBegin;
2152   a = matA->A; b = matA->B;
2153   c = matB->A; d = matB->B;
2154 
2155   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2156   if (flg) {
2157     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2158   }
2159   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 #undef __FUNCT__
2164 #define __FUNCT__ "MatCopy_MPIAIJ"
2165 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2166 {
2167   PetscErrorCode ierr;
2168   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2169   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2170 
2171   PetscFunctionBegin;
2172   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2173   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2174     /* because of the column compression in the off-processor part of the matrix a->B,
2175        the number of columns in a->B and b->B may be different, hence we cannot call
2176        the MatCopy() directly on the two parts. If need be, we can provide a more
2177        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2178        then copying the submatrices */
2179     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2180   } else {
2181     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2182     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2183   }
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 #undef __FUNCT__
2188 #define __FUNCT__ "MatSetUp_MPIAIJ"
2189 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2190 {
2191   PetscErrorCode ierr;
2192 
2193   PetscFunctionBegin;
2194   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 #undef __FUNCT__
2199 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2200 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2201 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2202 {
2203   PetscInt          i,m=Y->rmap->N;
2204   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2205   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2206   const PetscInt    *xi = x->i,*yi = y->i;
2207 
2208   PetscFunctionBegin;
2209   /* Set the number of nonzeros in the new matrix */
2210   for(i=0; i<m; i++) {
2211     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2212     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2213     nnz[i] = 0;
2214     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2215       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2216       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2217       nnz[i]++;
2218     }
2219     for (; k<nzy; k++) nnz[i]++;
2220   }
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 #undef __FUNCT__
2225 #define __FUNCT__ "MatAXPY_MPIAIJ"
2226 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2227 {
2228   PetscErrorCode ierr;
2229   PetscInt       i;
2230   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2231   PetscBLASInt   bnz,one=1;
2232   Mat_SeqAIJ     *x,*y;
2233 
2234   PetscFunctionBegin;
2235   if (str == SAME_NONZERO_PATTERN) {
2236     PetscScalar alpha = a;
2237     x = (Mat_SeqAIJ *)xx->A->data;
2238     y = (Mat_SeqAIJ *)yy->A->data;
2239     bnz = PetscBLASIntCast(x->nz);
2240     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2241     x = (Mat_SeqAIJ *)xx->B->data;
2242     y = (Mat_SeqAIJ *)yy->B->data;
2243     bnz = PetscBLASIntCast(x->nz);
2244     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2245   } else if (str == SUBSET_NONZERO_PATTERN) {
2246     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2247 
2248     x = (Mat_SeqAIJ *)xx->B->data;
2249     y = (Mat_SeqAIJ *)yy->B->data;
2250     if (y->xtoy && y->XtoY != xx->B) {
2251       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2252       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2253     }
2254     if (!y->xtoy) { /* get xtoy */
2255       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2256       y->XtoY = xx->B;
2257       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2258     }
2259     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2260   } else {
2261     Mat B;
2262     PetscInt *nnz_d,*nnz_o;
2263     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2264     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2265     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2266     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2267     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2268     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2269     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2270     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2271     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2272     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2273     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2274     ierr = MatHeaderReplace(Y,B);
2275     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2276     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2277   }
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2282 
2283 #undef __FUNCT__
2284 #define __FUNCT__ "MatConjugate_MPIAIJ"
2285 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2286 {
2287 #if defined(PETSC_USE_COMPLEX)
2288   PetscErrorCode ierr;
2289   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2290 
2291   PetscFunctionBegin;
2292   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2293   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2294 #else
2295   PetscFunctionBegin;
2296 #endif
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 #undef __FUNCT__
2301 #define __FUNCT__ "MatRealPart_MPIAIJ"
2302 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2303 {
2304   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2305   PetscErrorCode ierr;
2306 
2307   PetscFunctionBegin;
2308   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2309   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2310   PetscFunctionReturn(0);
2311 }
2312 
2313 #undef __FUNCT__
2314 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2315 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2316 {
2317   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2318   PetscErrorCode ierr;
2319 
2320   PetscFunctionBegin;
2321   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2322   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 #ifdef PETSC_HAVE_PBGL
2327 
2328 #include <boost/parallel/mpi/bsp_process_group.hpp>
2329 #include <boost/graph/distributed/ilu_default_graph.hpp>
2330 #include <boost/graph/distributed/ilu_0_block.hpp>
2331 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2332 #include <boost/graph/distributed/petsc/interface.hpp>
2333 #include <boost/multi_array.hpp>
2334 #include <boost/parallel/distributed_property_map->hpp>
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2338 /*
2339   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2340 */
2341 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2342 {
2343   namespace petsc = boost::distributed::petsc;
2344 
2345   namespace graph_dist = boost::graph::distributed;
2346   using boost::graph::distributed::ilu_default::process_group_type;
2347   using boost::graph::ilu_permuted;
2348 
2349   PetscBool       row_identity, col_identity;
2350   PetscContainer  c;
2351   PetscInt        m, n, M, N;
2352   PetscErrorCode  ierr;
2353 
2354   PetscFunctionBegin;
2355   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2356   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2357   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2358   if (!row_identity || !col_identity) {
2359     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2360   }
2361 
2362   process_group_type pg;
2363   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2364   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2365   lgraph_type&   level_graph = *lgraph_p;
2366   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2367 
2368   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2369   ilu_permuted(level_graph);
2370 
2371   /* put together the new matrix */
2372   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2373   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2374   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2375   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2376   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2377   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2378   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2379   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2380 
2381   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2382   ierr = PetscContainerSetPointer(c, lgraph_p);
2383   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2384   ierr = PetscContainerDestroy(&c);
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2390 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2391 {
2392   PetscFunctionBegin;
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatSolve_MPIAIJ"
2398 /*
2399   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2400 */
2401 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2402 {
2403   namespace graph_dist = boost::graph::distributed;
2404 
2405   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2406   lgraph_type*   lgraph_p;
2407   PetscContainer c;
2408   PetscErrorCode ierr;
2409 
2410   PetscFunctionBegin;
2411   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2412   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2413   ierr = VecCopy(b, x);CHKERRQ(ierr);
2414 
2415   PetscScalar* array_x;
2416   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2417   PetscInt sx;
2418   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2419 
2420   PetscScalar* array_b;
2421   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2422   PetscInt sb;
2423   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2424 
2425   lgraph_type&   level_graph = *lgraph_p;
2426   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2427 
2428   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2429   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2430                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2431 
2432   typedef boost::iterator_property_map<array_ref_type::iterator,
2433                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2434   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2435                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2436 
2437   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2438 
2439   PetscFunctionReturn(0);
2440 }
2441 #endif
2442 
2443 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2444   PetscInt       nzlocal,nsends,nrecvs;
2445   PetscMPIInt    *send_rank,*recv_rank;
2446   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2447   PetscScalar    *sbuf_a,**rbuf_a;
2448   PetscErrorCode (*Destroy)(Mat);
2449 } Mat_Redundant;
2450 
2451 #undef __FUNCT__
2452 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2453 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2454 {
2455   PetscErrorCode       ierr;
2456   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2457   PetscInt             i;
2458 
2459   PetscFunctionBegin;
2460   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2461   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2462   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2463   for (i=0; i<redund->nrecvs; i++){
2464     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2465     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2466   }
2467   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2468   ierr = PetscFree(redund);CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatDestroy_MatRedundant"
2474 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2475 {
2476   PetscErrorCode  ierr;
2477   PetscContainer  container;
2478   Mat_Redundant   *redund=PETSC_NULL;
2479 
2480   PetscFunctionBegin;
2481   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2482   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2483   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2484   A->ops->destroy = redund->Destroy;
2485   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2486   if (A->ops->destroy) {
2487     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2488   }
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2494 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2495 {
2496   PetscMPIInt    rank,size;
2497   MPI_Comm       comm=((PetscObject)mat)->comm;
2498   PetscErrorCode ierr;
2499   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2500   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2501   PetscInt       *rowrange=mat->rmap->range;
2502   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2503   Mat            A=aij->A,B=aij->B,C=*matredundant;
2504   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2505   PetscScalar    *sbuf_a;
2506   PetscInt       nzlocal=a->nz+b->nz;
2507   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2508   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2509   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2510   MatScalar      *aworkA,*aworkB;
2511   PetscScalar    *vals;
2512   PetscMPIInt    tag1,tag2,tag3,imdex;
2513   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2514                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2515   MPI_Status     recv_status,*send_status;
2516   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2517   PetscInt       **rbuf_j=PETSC_NULL;
2518   PetscScalar    **rbuf_a=PETSC_NULL;
2519   Mat_Redundant  *redund=PETSC_NULL;
2520   PetscContainer container;
2521 
2522   PetscFunctionBegin;
2523   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2524   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2525 
2526   if (reuse == MAT_REUSE_MATRIX) {
2527     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2528     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2529     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2530     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2531     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2532     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2533     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2534     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2535 
2536     nsends    = redund->nsends;
2537     nrecvs    = redund->nrecvs;
2538     send_rank = redund->send_rank;
2539     recv_rank = redund->recv_rank;
2540     sbuf_nz   = redund->sbuf_nz;
2541     rbuf_nz   = redund->rbuf_nz;
2542     sbuf_j    = redund->sbuf_j;
2543     sbuf_a    = redund->sbuf_a;
2544     rbuf_j    = redund->rbuf_j;
2545     rbuf_a    = redund->rbuf_a;
2546   }
2547 
2548   if (reuse == MAT_INITIAL_MATRIX){
2549     PetscMPIInt  subrank,subsize;
2550     PetscInt     nleftover,np_subcomm;
2551     /* get the destination processors' id send_rank, nsends and nrecvs */
2552     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2553     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2554     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2555     np_subcomm = size/nsubcomm;
2556     nleftover  = size - nsubcomm*np_subcomm;
2557     nsends = 0; nrecvs = 0;
2558     for (i=0; i<size; i++){ /* i=rank*/
2559       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2560         send_rank[nsends] = i; nsends++;
2561         recv_rank[nrecvs++] = i;
2562       }
2563     }
2564     if (rank >= size - nleftover){/* this proc is a leftover processor */
2565       i = size-nleftover-1;
2566       j = 0;
2567       while (j < nsubcomm - nleftover){
2568         send_rank[nsends++] = i;
2569         i--; j++;
2570       }
2571     }
2572 
2573     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2574       for (i=0; i<nleftover; i++){
2575         recv_rank[nrecvs++] = size-nleftover+i;
2576       }
2577     }
2578 
2579     /* allocate sbuf_j, sbuf_a */
2580     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2581     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2582     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2583   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2584 
2585   /* copy mat's local entries into the buffers */
2586   if (reuse == MAT_INITIAL_MATRIX){
2587     rownz_max = 0;
2588     rptr = sbuf_j;
2589     cols = sbuf_j + rend-rstart + 1;
2590     vals = sbuf_a;
2591     rptr[0] = 0;
2592     for (i=0; i<rend-rstart; i++){
2593       row = i + rstart;
2594       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2595       ncols  = nzA + nzB;
2596       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2597       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2598       /* load the column indices for this row into cols */
2599       lwrite = 0;
2600       for (l=0; l<nzB; l++) {
2601         if ((ctmp = bmap[cworkB[l]]) < cstart){
2602           vals[lwrite]   = aworkB[l];
2603           cols[lwrite++] = ctmp;
2604         }
2605       }
2606       for (l=0; l<nzA; l++){
2607         vals[lwrite]   = aworkA[l];
2608         cols[lwrite++] = cstart + cworkA[l];
2609       }
2610       for (l=0; l<nzB; l++) {
2611         if ((ctmp = bmap[cworkB[l]]) >= cend){
2612           vals[lwrite]   = aworkB[l];
2613           cols[lwrite++] = ctmp;
2614         }
2615       }
2616       vals += ncols;
2617       cols += ncols;
2618       rptr[i+1] = rptr[i] + ncols;
2619       if (rownz_max < ncols) rownz_max = ncols;
2620     }
2621     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2622   } else { /* only copy matrix values into sbuf_a */
2623     rptr = sbuf_j;
2624     vals = sbuf_a;
2625     rptr[0] = 0;
2626     for (i=0; i<rend-rstart; i++){
2627       row = i + rstart;
2628       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2629       ncols  = nzA + nzB;
2630       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2631       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2632       lwrite = 0;
2633       for (l=0; l<nzB; l++) {
2634         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2635       }
2636       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2637       for (l=0; l<nzB; l++) {
2638         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2639       }
2640       vals += ncols;
2641       rptr[i+1] = rptr[i] + ncols;
2642     }
2643   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2644 
2645   /* send nzlocal to others, and recv other's nzlocal */
2646   /*--------------------------------------------------*/
2647   if (reuse == MAT_INITIAL_MATRIX){
2648     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2649     s_waits2 = s_waits3 + nsends;
2650     s_waits1 = s_waits2 + nsends;
2651     r_waits1 = s_waits1 + nsends;
2652     r_waits2 = r_waits1 + nrecvs;
2653     r_waits3 = r_waits2 + nrecvs;
2654   } else {
2655     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2656     r_waits3 = s_waits3 + nsends;
2657   }
2658 
2659   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2660   if (reuse == MAT_INITIAL_MATRIX){
2661     /* get new tags to keep the communication clean */
2662     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2663     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2664     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2665 
2666     /* post receives of other's nzlocal */
2667     for (i=0; i<nrecvs; i++){
2668       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2669     }
2670     /* send nzlocal to others */
2671     for (i=0; i<nsends; i++){
2672       sbuf_nz[i] = nzlocal;
2673       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2674     }
2675     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2676     count = nrecvs;
2677     while (count) {
2678       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2679       recv_rank[imdex] = recv_status.MPI_SOURCE;
2680       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2681       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2682 
2683       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2684       rbuf_nz[imdex] += i + 2;
2685       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2686       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2687       count--;
2688     }
2689     /* wait on sends of nzlocal */
2690     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2691     /* send mat->i,j to others, and recv from other's */
2692     /*------------------------------------------------*/
2693     for (i=0; i<nsends; i++){
2694       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2695       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2696     }
2697     /* wait on receives of mat->i,j */
2698     /*------------------------------*/
2699     count = nrecvs;
2700     while (count) {
2701       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2702       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2703       count--;
2704     }
2705     /* wait on sends of mat->i,j */
2706     /*---------------------------*/
2707     if (nsends) {
2708       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2709     }
2710   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2711 
2712   /* post receives, send and receive mat->a */
2713   /*----------------------------------------*/
2714   for (imdex=0; imdex<nrecvs; imdex++) {
2715     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2716   }
2717   for (i=0; i<nsends; i++){
2718     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2719   }
2720   count = nrecvs;
2721   while (count) {
2722     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2723     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2724     count--;
2725   }
2726   if (nsends) {
2727     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2728   }
2729 
2730   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2731 
2732   /* create redundant matrix */
2733   /*-------------------------*/
2734   if (reuse == MAT_INITIAL_MATRIX){
2735     /* compute rownz_max for preallocation */
2736     for (imdex=0; imdex<nrecvs; imdex++){
2737       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2738       rptr = rbuf_j[imdex];
2739       for (i=0; i<j; i++){
2740         ncols = rptr[i+1] - rptr[i];
2741         if (rownz_max < ncols) rownz_max = ncols;
2742       }
2743     }
2744 
2745     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2746     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2747     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2748     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2749     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2750     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2751   } else {
2752     C = *matredundant;
2753   }
2754 
2755   /* insert local matrix entries */
2756   rptr = sbuf_j;
2757   cols = sbuf_j + rend-rstart + 1;
2758   vals = sbuf_a;
2759   for (i=0; i<rend-rstart; i++){
2760     row   = i + rstart;
2761     ncols = rptr[i+1] - rptr[i];
2762     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2763     vals += ncols;
2764     cols += ncols;
2765   }
2766   /* insert received matrix entries */
2767   for (imdex=0; imdex<nrecvs; imdex++){
2768     rstart = rowrange[recv_rank[imdex]];
2769     rend   = rowrange[recv_rank[imdex]+1];
2770     rptr = rbuf_j[imdex];
2771     cols = rbuf_j[imdex] + rend-rstart + 1;
2772     vals = rbuf_a[imdex];
2773     for (i=0; i<rend-rstart; i++){
2774       row   = i + rstart;
2775       ncols = rptr[i+1] - rptr[i];
2776       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2777       vals += ncols;
2778       cols += ncols;
2779     }
2780   }
2781   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2782   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2783   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2784   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2785   if (reuse == MAT_INITIAL_MATRIX) {
2786     PetscContainer container;
2787     *matredundant = C;
2788     /* create a supporting struct and attach it to C for reuse */
2789     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2790     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2791     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2792     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2793     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2794     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2795 
2796     redund->nzlocal = nzlocal;
2797     redund->nsends  = nsends;
2798     redund->nrecvs  = nrecvs;
2799     redund->send_rank = send_rank;
2800     redund->recv_rank = recv_rank;
2801     redund->sbuf_nz = sbuf_nz;
2802     redund->rbuf_nz = rbuf_nz;
2803     redund->sbuf_j  = sbuf_j;
2804     redund->sbuf_a  = sbuf_a;
2805     redund->rbuf_j  = rbuf_j;
2806     redund->rbuf_a  = rbuf_a;
2807 
2808     redund->Destroy = C->ops->destroy;
2809     C->ops->destroy = MatDestroy_MatRedundant;
2810   }
2811   PetscFunctionReturn(0);
2812 }
2813 
2814 #undef __FUNCT__
2815 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2816 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2817 {
2818   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2819   PetscErrorCode ierr;
2820   PetscInt       i,*idxb = 0;
2821   PetscScalar    *va,*vb;
2822   Vec            vtmp;
2823 
2824   PetscFunctionBegin;
2825   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2826   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2827   if (idx) {
2828     for (i=0; i<A->rmap->n; i++) {
2829       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2830     }
2831   }
2832 
2833   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2834   if (idx) {
2835     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2836   }
2837   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2838   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2839 
2840   for (i=0; i<A->rmap->n; i++){
2841     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2842       va[i] = vb[i];
2843       if (idx) idx[i] = a->garray[idxb[i]];
2844     }
2845   }
2846 
2847   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2848   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2849   ierr = PetscFree(idxb);CHKERRQ(ierr);
2850   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 #undef __FUNCT__
2855 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2856 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2857 {
2858   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2859   PetscErrorCode ierr;
2860   PetscInt       i,*idxb = 0;
2861   PetscScalar    *va,*vb;
2862   Vec            vtmp;
2863 
2864   PetscFunctionBegin;
2865   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2866   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2867   if (idx) {
2868     for (i=0; i<A->cmap->n; i++) {
2869       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2870     }
2871   }
2872 
2873   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2874   if (idx) {
2875     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2876   }
2877   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2878   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2879 
2880   for (i=0; i<A->rmap->n; i++){
2881     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2882       va[i] = vb[i];
2883       if (idx) idx[i] = a->garray[idxb[i]];
2884     }
2885   }
2886 
2887   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2888   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2889   ierr = PetscFree(idxb);CHKERRQ(ierr);
2890   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2891   PetscFunctionReturn(0);
2892 }
2893 
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2896 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2897 {
2898   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2899   PetscInt       n      = A->rmap->n;
2900   PetscInt       cstart = A->cmap->rstart;
2901   PetscInt      *cmap   = mat->garray;
2902   PetscInt      *diagIdx, *offdiagIdx;
2903   Vec            diagV, offdiagV;
2904   PetscScalar   *a, *diagA, *offdiagA;
2905   PetscInt       r;
2906   PetscErrorCode ierr;
2907 
2908   PetscFunctionBegin;
2909   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2910   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2911   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2912   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2913   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2914   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2915   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2916   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2917   for(r = 0; r < n; ++r) {
2918     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2919       a[r]   = diagA[r];
2920       idx[r] = cstart + diagIdx[r];
2921     } else {
2922       a[r]   = offdiagA[r];
2923       idx[r] = cmap[offdiagIdx[r]];
2924     }
2925   }
2926   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2927   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2928   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2929   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2930   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2931   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 #undef __FUNCT__
2936 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2937 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2938 {
2939   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2940   PetscInt       n      = A->rmap->n;
2941   PetscInt       cstart = A->cmap->rstart;
2942   PetscInt      *cmap   = mat->garray;
2943   PetscInt      *diagIdx, *offdiagIdx;
2944   Vec            diagV, offdiagV;
2945   PetscScalar   *a, *diagA, *offdiagA;
2946   PetscInt       r;
2947   PetscErrorCode ierr;
2948 
2949   PetscFunctionBegin;
2950   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2951   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2952   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2953   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2954   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2955   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2956   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2957   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2958   for(r = 0; r < n; ++r) {
2959     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2960       a[r]   = diagA[r];
2961       idx[r] = cstart + diagIdx[r];
2962     } else {
2963       a[r]   = offdiagA[r];
2964       idx[r] = cmap[offdiagIdx[r]];
2965     }
2966   }
2967   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2968   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2969   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2970   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2971   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2972   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 #undef __FUNCT__
2977 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2978 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2979 {
2980   PetscErrorCode ierr;
2981   Mat            *dummy;
2982 
2983   PetscFunctionBegin;
2984   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2985   *newmat = *dummy;
2986   ierr = PetscFree(dummy);CHKERRQ(ierr);
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2991 
2992 #undef __FUNCT__
2993 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2994 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2995 {
2996   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2997   PetscErrorCode ierr;
2998 
2999   PetscFunctionBegin;
3000   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3001   PetscFunctionReturn(0);
3002 }
3003 
3004 
3005 /* -------------------------------------------------------------------*/
3006 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3007        MatGetRow_MPIAIJ,
3008        MatRestoreRow_MPIAIJ,
3009        MatMult_MPIAIJ,
3010 /* 4*/ MatMultAdd_MPIAIJ,
3011        MatMultTranspose_MPIAIJ,
3012        MatMultTransposeAdd_MPIAIJ,
3013 #ifdef PETSC_HAVE_PBGL
3014        MatSolve_MPIAIJ,
3015 #else
3016        0,
3017 #endif
3018        0,
3019        0,
3020 /*10*/ 0,
3021        0,
3022        0,
3023        MatSOR_MPIAIJ,
3024        MatTranspose_MPIAIJ,
3025 /*15*/ MatGetInfo_MPIAIJ,
3026        MatEqual_MPIAIJ,
3027        MatGetDiagonal_MPIAIJ,
3028        MatDiagonalScale_MPIAIJ,
3029        MatNorm_MPIAIJ,
3030 /*20*/ MatAssemblyBegin_MPIAIJ,
3031        MatAssemblyEnd_MPIAIJ,
3032        MatSetOption_MPIAIJ,
3033        MatZeroEntries_MPIAIJ,
3034 /*24*/ MatZeroRows_MPIAIJ,
3035        0,
3036 #ifdef PETSC_HAVE_PBGL
3037        0,
3038 #else
3039        0,
3040 #endif
3041        0,
3042        0,
3043 /*29*/ MatSetUp_MPIAIJ,
3044 #ifdef PETSC_HAVE_PBGL
3045        0,
3046 #else
3047        0,
3048 #endif
3049        0,
3050        0,
3051        0,
3052 /*34*/ MatDuplicate_MPIAIJ,
3053        0,
3054        0,
3055        0,
3056        0,
3057 /*39*/ MatAXPY_MPIAIJ,
3058        MatGetSubMatrices_MPIAIJ,
3059        MatIncreaseOverlap_MPIAIJ,
3060        MatGetValues_MPIAIJ,
3061        MatCopy_MPIAIJ,
3062 /*44*/ MatGetRowMax_MPIAIJ,
3063        MatScale_MPIAIJ,
3064        0,
3065        0,
3066        MatZeroRowsColumns_MPIAIJ,
3067 /*49*/ 0,
3068        0,
3069        0,
3070        0,
3071        0,
3072 /*54*/ MatFDColoringCreate_MPIAIJ,
3073        0,
3074        MatSetUnfactored_MPIAIJ,
3075        MatPermute_MPIAIJ,
3076        0,
3077 /*59*/ MatGetSubMatrix_MPIAIJ,
3078        MatDestroy_MPIAIJ,
3079        MatView_MPIAIJ,
3080        0,
3081        0,
3082 /*64*/ 0,
3083        0,
3084        0,
3085        0,
3086        0,
3087 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3088        MatGetRowMinAbs_MPIAIJ,
3089        0,
3090        MatSetColoring_MPIAIJ,
3091 #if defined(PETSC_HAVE_ADIC)
3092        MatSetValuesAdic_MPIAIJ,
3093 #else
3094        0,
3095 #endif
3096        MatSetValuesAdifor_MPIAIJ,
3097 /*75*/ MatFDColoringApply_AIJ,
3098        0,
3099        0,
3100        0,
3101        0,
3102 /*80*/ 0,
3103        0,
3104        0,
3105 /*83*/ MatLoad_MPIAIJ,
3106        0,
3107        0,
3108        0,
3109        0,
3110        0,
3111 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3112        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3113        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3114        MatPtAP_Basic,
3115        MatPtAPSymbolic_MPIAIJ,
3116 /*94*/ MatPtAPNumeric_MPIAIJ,
3117        0,
3118        0,
3119        0,
3120        0,
3121 /*99*/ 0,
3122        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3123        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3124        MatConjugate_MPIAIJ,
3125        0,
3126 /*104*/MatSetValuesRow_MPIAIJ,
3127        MatRealPart_MPIAIJ,
3128        MatImaginaryPart_MPIAIJ,
3129        0,
3130        0,
3131 /*109*/0,
3132        MatGetRedundantMatrix_MPIAIJ,
3133        MatGetRowMin_MPIAIJ,
3134        0,
3135        0,
3136 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3137        0,
3138        0,
3139        0,
3140        0,
3141 /*119*/0,
3142        0,
3143        0,
3144        0,
3145        MatGetMultiProcBlock_MPIAIJ,
3146 /*124*/MatFindNonZeroRows_MPIAIJ,
3147        MatGetColumnNorms_MPIAIJ,
3148        MatInvertBlockDiagonal_MPIAIJ,
3149        0,
3150        MatGetSubMatricesParallel_MPIAIJ,
3151 /*129*/0,
3152        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3153        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3154        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3155        0,
3156 /*134*/0,
3157        0,
3158        0,
3159        0,
3160        0
3161 };
3162 
3163 /* ----------------------------------------------------------------------------------------*/
3164 
3165 EXTERN_C_BEGIN
3166 #undef __FUNCT__
3167 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3168 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3169 {
3170   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3171   PetscErrorCode ierr;
3172 
3173   PetscFunctionBegin;
3174   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3175   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3176   PetscFunctionReturn(0);
3177 }
3178 EXTERN_C_END
3179 
3180 EXTERN_C_BEGIN
3181 #undef __FUNCT__
3182 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3183 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3184 {
3185   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3186   PetscErrorCode ierr;
3187 
3188   PetscFunctionBegin;
3189   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3190   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3191   PetscFunctionReturn(0);
3192 }
3193 EXTERN_C_END
3194 
3195 EXTERN_C_BEGIN
3196 #undef __FUNCT__
3197 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3198 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3199 {
3200   Mat_MPIAIJ     *b;
3201   PetscErrorCode ierr;
3202   PetscInt       i;
3203   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3204 
3205   PetscFunctionBegin;
3206   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3207   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3208   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3209   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3210   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3211   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3212 
3213   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3214   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3215   if (d_nnz) {
3216     for (i=0; i<B->rmap->n; i++) {
3217       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3218     }
3219   }
3220   if (o_nnz) {
3221     for (i=0; i<B->rmap->n; i++) {
3222       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3223     }
3224   }
3225   b = (Mat_MPIAIJ*)B->data;
3226 
3227   if (!B->preallocated) {
3228     /* Explicitly create 2 MATSEQAIJ matrices. */
3229     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3230     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3231     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3232     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3233     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3234     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3235     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3236     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3237     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3238     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3239   }
3240 
3241   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3242   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3243   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3244   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3245   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3246   B->preallocated = PETSC_TRUE;
3247   PetscFunctionReturn(0);
3248 }
3249 EXTERN_C_END
3250 
3251 #undef __FUNCT__
3252 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3253 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3254 {
3255   Mat            mat;
3256   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3257   PetscErrorCode ierr;
3258 
3259   PetscFunctionBegin;
3260   *newmat       = 0;
3261   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3262   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3263   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3264   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3265   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3266   a    = (Mat_MPIAIJ*)mat->data;
3267 
3268   mat->factortype    = matin->factortype;
3269   mat->rmap->bs      = matin->rmap->bs;
3270   mat->cmap->bs      = matin->cmap->bs;
3271   mat->assembled    = PETSC_TRUE;
3272   mat->insertmode   = NOT_SET_VALUES;
3273   mat->preallocated = PETSC_TRUE;
3274 
3275   a->size           = oldmat->size;
3276   a->rank           = oldmat->rank;
3277   a->donotstash     = oldmat->donotstash;
3278   a->roworiented    = oldmat->roworiented;
3279   a->rowindices     = 0;
3280   a->rowvalues      = 0;
3281   a->getrowactive   = PETSC_FALSE;
3282 
3283   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3284   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3285 
3286   if (oldmat->colmap) {
3287 #if defined (PETSC_USE_CTABLE)
3288     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3289 #else
3290     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3291     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3292     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3293 #endif
3294   } else a->colmap = 0;
3295   if (oldmat->garray) {
3296     PetscInt len;
3297     len  = oldmat->B->cmap->n;
3298     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3299     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3300     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3301   } else a->garray = 0;
3302 
3303   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3304   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3305   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3306   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3307   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3308   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3309   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3310   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3311   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3312   *newmat = mat;
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 
3317 
3318 #undef __FUNCT__
3319 #define __FUNCT__ "MatLoad_MPIAIJ"
3320 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3321 {
3322   PetscScalar    *vals,*svals;
3323   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3324   PetscErrorCode ierr;
3325   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3326   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3327   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3328   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3329   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3330   int            fd;
3331 
3332   PetscFunctionBegin;
3333   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3334   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3335   if (!rank) {
3336     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3337     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3338     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3339   }
3340 
3341   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3342 
3343   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3344   M = header[1]; N = header[2];
3345   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3346   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3347   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3348 
3349   /* If global sizes are set, check if they are consistent with that given in the file */
3350   if (sizesset) {
3351     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3352   }
3353   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3354   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3355 
3356   /* determine ownership of all rows */
3357   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3358   else m = newMat->rmap->n; /* Set by user */
3359 
3360   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3361   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3362 
3363   /* First process needs enough room for process with most rows */
3364   if (!rank) {
3365     mmax       = rowners[1];
3366     for (i=2; i<size; i++) {
3367       mmax = PetscMax(mmax,rowners[i]);
3368     }
3369   } else mmax = m;
3370 
3371   rowners[0] = 0;
3372   for (i=2; i<=size; i++) {
3373     rowners[i] += rowners[i-1];
3374   }
3375   rstart = rowners[rank];
3376   rend   = rowners[rank+1];
3377 
3378   /* distribute row lengths to all processors */
3379   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3380   if (!rank) {
3381     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3382     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3383     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3384     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3385     for (j=0; j<m; j++) {
3386       procsnz[0] += ourlens[j];
3387     }
3388     for (i=1; i<size; i++) {
3389       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3390       /* calculate the number of nonzeros on each processor */
3391       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3392         procsnz[i] += rowlengths[j];
3393       }
3394       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3395     }
3396     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3397   } else {
3398     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3399   }
3400 
3401   if (!rank) {
3402     /* determine max buffer needed and allocate it */
3403     maxnz = 0;
3404     for (i=0; i<size; i++) {
3405       maxnz = PetscMax(maxnz,procsnz[i]);
3406     }
3407     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3408 
3409     /* read in my part of the matrix column indices  */
3410     nz   = procsnz[0];
3411     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3412     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3413 
3414     /* read in every one elses and ship off */
3415     for (i=1; i<size; i++) {
3416       nz     = procsnz[i];
3417       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3418       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3419     }
3420     ierr = PetscFree(cols);CHKERRQ(ierr);
3421   } else {
3422     /* determine buffer space needed for message */
3423     nz = 0;
3424     for (i=0; i<m; i++) {
3425       nz += ourlens[i];
3426     }
3427     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3428 
3429     /* receive message of column indices*/
3430     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3431   }
3432 
3433   /* determine column ownership if matrix is not square */
3434   if (N != M) {
3435     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3436     else n = newMat->cmap->n;
3437     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3438     cstart = cend - n;
3439   } else {
3440     cstart = rstart;
3441     cend   = rend;
3442     n      = cend - cstart;
3443   }
3444 
3445   /* loop over local rows, determining number of off diagonal entries */
3446   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3447   jj = 0;
3448   for (i=0; i<m; i++) {
3449     for (j=0; j<ourlens[i]; j++) {
3450       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3451       jj++;
3452     }
3453   }
3454 
3455   for (i=0; i<m; i++) {
3456     ourlens[i] -= offlens[i];
3457   }
3458   if (!sizesset) {
3459     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3460   }
3461   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3462 
3463   for (i=0; i<m; i++) {
3464     ourlens[i] += offlens[i];
3465   }
3466 
3467   if (!rank) {
3468     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3469 
3470     /* read in my part of the matrix numerical values  */
3471     nz   = procsnz[0];
3472     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3473 
3474     /* insert into matrix */
3475     jj      = rstart;
3476     smycols = mycols;
3477     svals   = vals;
3478     for (i=0; i<m; i++) {
3479       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3480       smycols += ourlens[i];
3481       svals   += ourlens[i];
3482       jj++;
3483     }
3484 
3485     /* read in other processors and ship out */
3486     for (i=1; i<size; i++) {
3487       nz     = procsnz[i];
3488       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3489       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3490     }
3491     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3492   } else {
3493     /* receive numeric values */
3494     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3495 
3496     /* receive message of values*/
3497     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3498 
3499     /* insert into matrix */
3500     jj      = rstart;
3501     smycols = mycols;
3502     svals   = vals;
3503     for (i=0; i<m; i++) {
3504       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3505       smycols += ourlens[i];
3506       svals   += ourlens[i];
3507       jj++;
3508     }
3509   }
3510   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3511   ierr = PetscFree(vals);CHKERRQ(ierr);
3512   ierr = PetscFree(mycols);CHKERRQ(ierr);
3513   ierr = PetscFree(rowners);CHKERRQ(ierr);
3514   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3515   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3516   PetscFunctionReturn(0);
3517 }
3518 
3519 #undef __FUNCT__
3520 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3521 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3522 {
3523   PetscErrorCode ierr;
3524   IS             iscol_local;
3525   PetscInt       csize;
3526 
3527   PetscFunctionBegin;
3528   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3529   if (call == MAT_REUSE_MATRIX) {
3530     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3531     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3532   } else {
3533     PetscInt cbs;
3534     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3535     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3536     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3537   }
3538   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3539   if (call == MAT_INITIAL_MATRIX) {
3540     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3541     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3542   }
3543   PetscFunctionReturn(0);
3544 }
3545 
3546 #undef __FUNCT__
3547 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3548 /*
3549     Not great since it makes two copies of the submatrix, first an SeqAIJ
3550   in local and then by concatenating the local matrices the end result.
3551   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3552 
3553   Note: This requires a sequential iscol with all indices.
3554 */
3555 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3556 {
3557   PetscErrorCode ierr;
3558   PetscMPIInt    rank,size;
3559   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3560   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3561   Mat            *local,M,Mreuse;
3562   MatScalar      *vwork,*aa;
3563   MPI_Comm       comm = ((PetscObject)mat)->comm;
3564   Mat_SeqAIJ     *aij;
3565 
3566 
3567   PetscFunctionBegin;
3568   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3569   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3570 
3571   if (call ==  MAT_REUSE_MATRIX) {
3572     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3573     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3574     local = &Mreuse;
3575     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3576   } else {
3577     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3578     Mreuse = *local;
3579     ierr   = PetscFree(local);CHKERRQ(ierr);
3580   }
3581 
3582   /*
3583       m - number of local rows
3584       n - number of columns (same on all processors)
3585       rstart - first row in new global matrix generated
3586   */
3587   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3588   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3589   if (call == MAT_INITIAL_MATRIX) {
3590     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3591     ii  = aij->i;
3592     jj  = aij->j;
3593 
3594     /*
3595         Determine the number of non-zeros in the diagonal and off-diagonal
3596         portions of the matrix in order to do correct preallocation
3597     */
3598 
3599     /* first get start and end of "diagonal" columns */
3600     if (csize == PETSC_DECIDE) {
3601       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3602       if (mglobal == n) { /* square matrix */
3603 	nlocal = m;
3604       } else {
3605         nlocal = n/size + ((n % size) > rank);
3606       }
3607     } else {
3608       nlocal = csize;
3609     }
3610     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3611     rstart = rend - nlocal;
3612     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3613 
3614     /* next, compute all the lengths */
3615     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3616     olens = dlens + m;
3617     for (i=0; i<m; i++) {
3618       jend = ii[i+1] - ii[i];
3619       olen = 0;
3620       dlen = 0;
3621       for (j=0; j<jend; j++) {
3622         if (*jj < rstart || *jj >= rend) olen++;
3623         else dlen++;
3624         jj++;
3625       }
3626       olens[i] = olen;
3627       dlens[i] = dlen;
3628     }
3629     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3630     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3631     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3632     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3633     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3634     ierr = PetscFree(dlens);CHKERRQ(ierr);
3635   } else {
3636     PetscInt ml,nl;
3637 
3638     M = *newmat;
3639     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3640     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3641     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3642     /*
3643          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3644        rather than the slower MatSetValues().
3645     */
3646     M->was_assembled = PETSC_TRUE;
3647     M->assembled     = PETSC_FALSE;
3648   }
3649   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3650   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3651   ii  = aij->i;
3652   jj  = aij->j;
3653   aa  = aij->a;
3654   for (i=0; i<m; i++) {
3655     row   = rstart + i;
3656     nz    = ii[i+1] - ii[i];
3657     cwork = jj;     jj += nz;
3658     vwork = aa;     aa += nz;
3659     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3660   }
3661 
3662   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3663   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3664   *newmat = M;
3665 
3666   /* save submatrix used in processor for next request */
3667   if (call ==  MAT_INITIAL_MATRIX) {
3668     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3669     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3670   }
3671 
3672   PetscFunctionReturn(0);
3673 }
3674 
3675 EXTERN_C_BEGIN
3676 #undef __FUNCT__
3677 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3678 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3679 {
3680   PetscInt       m,cstart, cend,j,nnz,i,d;
3681   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3682   const PetscInt *JJ;
3683   PetscScalar    *values;
3684   PetscErrorCode ierr;
3685 
3686   PetscFunctionBegin;
3687   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3688 
3689   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3690   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3691   m      = B->rmap->n;
3692   cstart = B->cmap->rstart;
3693   cend   = B->cmap->rend;
3694   rstart = B->rmap->rstart;
3695 
3696   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3697 
3698 #if defined(PETSC_USE_DEBUGGING)
3699   for (i=0; i<m; i++) {
3700     nnz     = Ii[i+1]- Ii[i];
3701     JJ      = J + Ii[i];
3702     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3703     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3704     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3705   }
3706 #endif
3707 
3708   for (i=0; i<m; i++) {
3709     nnz     = Ii[i+1]- Ii[i];
3710     JJ      = J + Ii[i];
3711     nnz_max = PetscMax(nnz_max,nnz);
3712     d       = 0;
3713     for (j=0; j<nnz; j++) {
3714       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3715     }
3716     d_nnz[i] = d;
3717     o_nnz[i] = nnz - d;
3718   }
3719   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3720   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3721 
3722   if (v) values = (PetscScalar*)v;
3723   else {
3724     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3725     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3726   }
3727 
3728   for (i=0; i<m; i++) {
3729     ii   = i + rstart;
3730     nnz  = Ii[i+1]- Ii[i];
3731     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3732   }
3733   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3734   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3735 
3736   if (!v) {
3737     ierr = PetscFree(values);CHKERRQ(ierr);
3738   }
3739   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3740   PetscFunctionReturn(0);
3741 }
3742 EXTERN_C_END
3743 
3744 #undef __FUNCT__
3745 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3746 /*@
3747    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3748    (the default parallel PETSc format).
3749 
3750    Collective on MPI_Comm
3751 
3752    Input Parameters:
3753 +  B - the matrix
3754 .  i - the indices into j for the start of each local row (starts with zero)
3755 .  j - the column indices for each local row (starts with zero)
3756 -  v - optional values in the matrix
3757 
3758    Level: developer
3759 
3760    Notes:
3761        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3762      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3763      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3764 
3765        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3766 
3767        The format which is used for the sparse matrix input, is equivalent to a
3768     row-major ordering.. i.e for the following matrix, the input data expected is
3769     as shown:
3770 
3771         1 0 0
3772         2 0 3     P0
3773        -------
3774         4 5 6     P1
3775 
3776      Process0 [P0]: rows_owned=[0,1]
3777         i =  {0,1,3}  [size = nrow+1  = 2+1]
3778         j =  {0,0,2}  [size = nz = 6]
3779         v =  {1,2,3}  [size = nz = 6]
3780 
3781      Process1 [P1]: rows_owned=[2]
3782         i =  {0,3}    [size = nrow+1  = 1+1]
3783         j =  {0,1,2}  [size = nz = 6]
3784         v =  {4,5,6}  [size = nz = 6]
3785 
3786 .keywords: matrix, aij, compressed row, sparse, parallel
3787 
3788 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3789           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3790 @*/
3791 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3792 {
3793   PetscErrorCode ierr;
3794 
3795   PetscFunctionBegin;
3796   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3797   PetscFunctionReturn(0);
3798 }
3799 
3800 #undef __FUNCT__
3801 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3802 /*@C
3803    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3804    (the default parallel PETSc format).  For good matrix assembly performance
3805    the user should preallocate the matrix storage by setting the parameters
3806    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3807    performance can be increased by more than a factor of 50.
3808 
3809    Collective on MPI_Comm
3810 
3811    Input Parameters:
3812 +  A - the matrix
3813 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3814            (same value is used for all local rows)
3815 .  d_nnz - array containing the number of nonzeros in the various rows of the
3816            DIAGONAL portion of the local submatrix (possibly different for each row)
3817            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3818            The size of this array is equal to the number of local rows, i.e 'm'.
3819            For matrices that will be factored, you must leave room for (and set)
3820            the diagonal entry even if it is zero.
3821 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3822            submatrix (same value is used for all local rows).
3823 -  o_nnz - array containing the number of nonzeros in the various rows of the
3824            OFF-DIAGONAL portion of the local submatrix (possibly different for
3825            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3826            structure. The size of this array is equal to the number
3827            of local rows, i.e 'm'.
3828 
3829    If the *_nnz parameter is given then the *_nz parameter is ignored
3830 
3831    The AIJ format (also called the Yale sparse matrix format or
3832    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3833    storage.  The stored row and column indices begin with zero.
3834    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3835 
3836    The parallel matrix is partitioned such that the first m0 rows belong to
3837    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3838    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3839 
3840    The DIAGONAL portion of the local submatrix of a processor can be defined
3841    as the submatrix which is obtained by extraction the part corresponding to
3842    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3843    first row that belongs to the processor, r2 is the last row belonging to
3844    the this processor, and c1-c2 is range of indices of the local part of a
3845    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3846    common case of a square matrix, the row and column ranges are the same and
3847    the DIAGONAL part is also square. The remaining portion of the local
3848    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3849 
3850    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3851 
3852    You can call MatGetInfo() to get information on how effective the preallocation was;
3853    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3854    You can also run with the option -info and look for messages with the string
3855    malloc in them to see if additional memory allocation was needed.
3856 
3857    Example usage:
3858 
3859    Consider the following 8x8 matrix with 34 non-zero values, that is
3860    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3861    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3862    as follows:
3863 
3864 .vb
3865             1  2  0  |  0  3  0  |  0  4
3866     Proc0   0  5  6  |  7  0  0  |  8  0
3867             9  0 10  | 11  0  0  | 12  0
3868     -------------------------------------
3869            13  0 14  | 15 16 17  |  0  0
3870     Proc1   0 18  0  | 19 20 21  |  0  0
3871             0  0  0  | 22 23  0  | 24  0
3872     -------------------------------------
3873     Proc2  25 26 27  |  0  0 28  | 29  0
3874            30  0  0  | 31 32 33  |  0 34
3875 .ve
3876 
3877    This can be represented as a collection of submatrices as:
3878 
3879 .vb
3880       A B C
3881       D E F
3882       G H I
3883 .ve
3884 
3885    Where the submatrices A,B,C are owned by proc0, D,E,F are
3886    owned by proc1, G,H,I are owned by proc2.
3887 
3888    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3889    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3890    The 'M','N' parameters are 8,8, and have the same values on all procs.
3891 
3892    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3893    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3894    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3895    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3896    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3897    matrix, ans [DF] as another SeqAIJ matrix.
3898 
3899    When d_nz, o_nz parameters are specified, d_nz storage elements are
3900    allocated for every row of the local diagonal submatrix, and o_nz
3901    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3902    One way to choose d_nz and o_nz is to use the max nonzerors per local
3903    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3904    In this case, the values of d_nz,o_nz are:
3905 .vb
3906      proc0 : dnz = 2, o_nz = 2
3907      proc1 : dnz = 3, o_nz = 2
3908      proc2 : dnz = 1, o_nz = 4
3909 .ve
3910    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3911    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3912    for proc3. i.e we are using 12+15+10=37 storage locations to store
3913    34 values.
3914 
3915    When d_nnz, o_nnz parameters are specified, the storage is specified
3916    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3917    In the above case the values for d_nnz,o_nnz are:
3918 .vb
3919      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3920      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3921      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3922 .ve
3923    Here the space allocated is sum of all the above values i.e 34, and
3924    hence pre-allocation is perfect.
3925 
3926    Level: intermediate
3927 
3928 .keywords: matrix, aij, compressed row, sparse, parallel
3929 
3930 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3931           MPIAIJ, MatGetInfo()
3932 @*/
3933 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3934 {
3935   PetscErrorCode ierr;
3936 
3937   PetscFunctionBegin;
3938   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3939   PetscValidType(B,1);
3940   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3941   PetscFunctionReturn(0);
3942 }
3943 
3944 #undef __FUNCT__
3945 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3946 /*@
3947      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3948          CSR format the local rows.
3949 
3950    Collective on MPI_Comm
3951 
3952    Input Parameters:
3953 +  comm - MPI communicator
3954 .  m - number of local rows (Cannot be PETSC_DECIDE)
3955 .  n - This value should be the same as the local size used in creating the
3956        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3957        calculated if N is given) For square matrices n is almost always m.
3958 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3959 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3960 .   i - row indices
3961 .   j - column indices
3962 -   a - matrix values
3963 
3964    Output Parameter:
3965 .   mat - the matrix
3966 
3967    Level: intermediate
3968 
3969    Notes:
3970        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3971      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3972      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3973 
3974        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3975 
3976        The format which is used for the sparse matrix input, is equivalent to a
3977     row-major ordering.. i.e for the following matrix, the input data expected is
3978     as shown:
3979 
3980         1 0 0
3981         2 0 3     P0
3982        -------
3983         4 5 6     P1
3984 
3985      Process0 [P0]: rows_owned=[0,1]
3986         i =  {0,1,3}  [size = nrow+1  = 2+1]
3987         j =  {0,0,2}  [size = nz = 6]
3988         v =  {1,2,3}  [size = nz = 6]
3989 
3990      Process1 [P1]: rows_owned=[2]
3991         i =  {0,3}    [size = nrow+1  = 1+1]
3992         j =  {0,1,2}  [size = nz = 6]
3993         v =  {4,5,6}  [size = nz = 6]
3994 
3995 .keywords: matrix, aij, compressed row, sparse, parallel
3996 
3997 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3998           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3999 @*/
4000 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4001 {
4002   PetscErrorCode ierr;
4003 
4004   PetscFunctionBegin;
4005   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4006   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4007   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4008   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4009   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4010   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4011   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4012   PetscFunctionReturn(0);
4013 }
4014 
4015 #undef __FUNCT__
4016 #define __FUNCT__ "MatCreateAIJ"
4017 /*@C
4018    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4019    (the default parallel PETSc format).  For good matrix assembly performance
4020    the user should preallocate the matrix storage by setting the parameters
4021    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4022    performance can be increased by more than a factor of 50.
4023 
4024    Collective on MPI_Comm
4025 
4026    Input Parameters:
4027 +  comm - MPI communicator
4028 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4029            This value should be the same as the local size used in creating the
4030            y vector for the matrix-vector product y = Ax.
4031 .  n - This value should be the same as the local size used in creating the
4032        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4033        calculated if N is given) For square matrices n is almost always m.
4034 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4035 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4036 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4037            (same value is used for all local rows)
4038 .  d_nnz - array containing the number of nonzeros in the various rows of the
4039            DIAGONAL portion of the local submatrix (possibly different for each row)
4040            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4041            The size of this array is equal to the number of local rows, i.e 'm'.
4042 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4043            submatrix (same value is used for all local rows).
4044 -  o_nnz - array containing the number of nonzeros in the various rows of the
4045            OFF-DIAGONAL portion of the local submatrix (possibly different for
4046            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4047            structure. The size of this array is equal to the number
4048            of local rows, i.e 'm'.
4049 
4050    Output Parameter:
4051 .  A - the matrix
4052 
4053    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4054    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4055    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4056 
4057    Notes:
4058    If the *_nnz parameter is given then the *_nz parameter is ignored
4059 
4060    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4061    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4062    storage requirements for this matrix.
4063 
4064    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4065    processor than it must be used on all processors that share the object for
4066    that argument.
4067 
4068    The user MUST specify either the local or global matrix dimensions
4069    (possibly both).
4070 
4071    The parallel matrix is partitioned across processors such that the
4072    first m0 rows belong to process 0, the next m1 rows belong to
4073    process 1, the next m2 rows belong to process 2 etc.. where
4074    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4075    values corresponding to [m x N] submatrix.
4076 
4077    The columns are logically partitioned with the n0 columns belonging
4078    to 0th partition, the next n1 columns belonging to the next
4079    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4080 
4081    The DIAGONAL portion of the local submatrix on any given processor
4082    is the submatrix corresponding to the rows and columns m,n
4083    corresponding to the given processor. i.e diagonal matrix on
4084    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4085    etc. The remaining portion of the local submatrix [m x (N-n)]
4086    constitute the OFF-DIAGONAL portion. The example below better
4087    illustrates this concept.
4088 
4089    For a square global matrix we define each processor's diagonal portion
4090    to be its local rows and the corresponding columns (a square submatrix);
4091    each processor's off-diagonal portion encompasses the remainder of the
4092    local matrix (a rectangular submatrix).
4093 
4094    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4095 
4096    When calling this routine with a single process communicator, a matrix of
4097    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4098    type of communicator, use the construction mechanism:
4099      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4100 
4101    By default, this format uses inodes (identical nodes) when possible.
4102    We search for consecutive rows with the same nonzero structure, thereby
4103    reusing matrix information to achieve increased efficiency.
4104 
4105    Options Database Keys:
4106 +  -mat_no_inode  - Do not use inodes
4107 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4108 -  -mat_aij_oneindex - Internally use indexing starting at 1
4109         rather than 0.  Note that when calling MatSetValues(),
4110         the user still MUST index entries starting at 0!
4111 
4112 
4113    Example usage:
4114 
4115    Consider the following 8x8 matrix with 34 non-zero values, that is
4116    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4117    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4118    as follows:
4119 
4120 .vb
4121             1  2  0  |  0  3  0  |  0  4
4122     Proc0   0  5  6  |  7  0  0  |  8  0
4123             9  0 10  | 11  0  0  | 12  0
4124     -------------------------------------
4125            13  0 14  | 15 16 17  |  0  0
4126     Proc1   0 18  0  | 19 20 21  |  0  0
4127             0  0  0  | 22 23  0  | 24  0
4128     -------------------------------------
4129     Proc2  25 26 27  |  0  0 28  | 29  0
4130            30  0  0  | 31 32 33  |  0 34
4131 .ve
4132 
4133    This can be represented as a collection of submatrices as:
4134 
4135 .vb
4136       A B C
4137       D E F
4138       G H I
4139 .ve
4140 
4141    Where the submatrices A,B,C are owned by proc0, D,E,F are
4142    owned by proc1, G,H,I are owned by proc2.
4143 
4144    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4145    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4146    The 'M','N' parameters are 8,8, and have the same values on all procs.
4147 
4148    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4149    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4150    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4151    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4152    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4153    matrix, ans [DF] as another SeqAIJ matrix.
4154 
4155    When d_nz, o_nz parameters are specified, d_nz storage elements are
4156    allocated for every row of the local diagonal submatrix, and o_nz
4157    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4158    One way to choose d_nz and o_nz is to use the max nonzerors per local
4159    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4160    In this case, the values of d_nz,o_nz are:
4161 .vb
4162      proc0 : dnz = 2, o_nz = 2
4163      proc1 : dnz = 3, o_nz = 2
4164      proc2 : dnz = 1, o_nz = 4
4165 .ve
4166    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4167    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4168    for proc3. i.e we are using 12+15+10=37 storage locations to store
4169    34 values.
4170 
4171    When d_nnz, o_nnz parameters are specified, the storage is specified
4172    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4173    In the above case the values for d_nnz,o_nnz are:
4174 .vb
4175      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4176      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4177      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4178 .ve
4179    Here the space allocated is sum of all the above values i.e 34, and
4180    hence pre-allocation is perfect.
4181 
4182    Level: intermediate
4183 
4184 .keywords: matrix, aij, compressed row, sparse, parallel
4185 
4186 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4187           MPIAIJ, MatCreateMPIAIJWithArrays()
4188 @*/
4189 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4190 {
4191   PetscErrorCode ierr;
4192   PetscMPIInt    size;
4193 
4194   PetscFunctionBegin;
4195   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4196   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4198   if (size > 1) {
4199     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4200     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4201   } else {
4202     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4203     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4204   }
4205   PetscFunctionReturn(0);
4206 }
4207 
4208 #undef __FUNCT__
4209 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4210 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4211 {
4212   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4213 
4214   PetscFunctionBegin;
4215   *Ad     = a->A;
4216   *Ao     = a->B;
4217   *colmap = a->garray;
4218   PetscFunctionReturn(0);
4219 }
4220 
4221 #undef __FUNCT__
4222 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4223 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4224 {
4225   PetscErrorCode ierr;
4226   PetscInt       i;
4227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4228 
4229   PetscFunctionBegin;
4230   if (coloring->ctype == IS_COLORING_GLOBAL) {
4231     ISColoringValue *allcolors,*colors;
4232     ISColoring      ocoloring;
4233 
4234     /* set coloring for diagonal portion */
4235     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4236 
4237     /* set coloring for off-diagonal portion */
4238     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4239     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4240     for (i=0; i<a->B->cmap->n; i++) {
4241       colors[i] = allcolors[a->garray[i]];
4242     }
4243     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4244     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4245     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4246     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4247   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4248     ISColoringValue *colors;
4249     PetscInt        *larray;
4250     ISColoring      ocoloring;
4251 
4252     /* set coloring for diagonal portion */
4253     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4254     for (i=0; i<a->A->cmap->n; i++) {
4255       larray[i] = i + A->cmap->rstart;
4256     }
4257     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4258     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4259     for (i=0; i<a->A->cmap->n; i++) {
4260       colors[i] = coloring->colors[larray[i]];
4261     }
4262     ierr = PetscFree(larray);CHKERRQ(ierr);
4263     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4264     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4265     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4266 
4267     /* set coloring for off-diagonal portion */
4268     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4269     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4270     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4271     for (i=0; i<a->B->cmap->n; i++) {
4272       colors[i] = coloring->colors[larray[i]];
4273     }
4274     ierr = PetscFree(larray);CHKERRQ(ierr);
4275     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4276     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4277     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4278   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4279 
4280   PetscFunctionReturn(0);
4281 }
4282 
4283 #if defined(PETSC_HAVE_ADIC)
4284 #undef __FUNCT__
4285 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4286 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4287 {
4288   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4289   PetscErrorCode ierr;
4290 
4291   PetscFunctionBegin;
4292   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4293   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4294   PetscFunctionReturn(0);
4295 }
4296 #endif
4297 
4298 #undef __FUNCT__
4299 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4300 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4301 {
4302   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4303   PetscErrorCode ierr;
4304 
4305   PetscFunctionBegin;
4306   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4307   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4308   PetscFunctionReturn(0);
4309 }
4310 
4311 #undef __FUNCT__
4312 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4313 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4314 {
4315   PetscErrorCode ierr;
4316   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4317   PetscInt       *indx;
4318 
4319   PetscFunctionBegin;
4320   /* This routine will ONLY return MPIAIJ type matrix */
4321   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4322   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4323   if (n == PETSC_DECIDE){
4324     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4325   }
4326   /* Check sum(n) = N */
4327   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4328   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4329 
4330   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4331   rstart -= m;
4332 
4333   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4334   for (i=0;i<m;i++) {
4335     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4336     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4337     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4338   }
4339 
4340   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4341   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4342   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4343   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4344   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4345   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4346   PetscFunctionReturn(0);
4347 }
4348 
4349 #undef __FUNCT__
4350 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4351 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4352 {
4353   PetscErrorCode ierr;
4354   PetscInt       m,N,i,rstart,nnz,Ii;
4355   PetscInt       *indx;
4356   PetscScalar    *values;
4357 
4358   PetscFunctionBegin;
4359   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4360   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4361   for (i=0;i<m;i++) {
4362     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4363     Ii    = i + rstart;
4364     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4365     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4366   }
4367   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4368   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4369   PetscFunctionReturn(0);
4370 }
4371 
4372 #undef __FUNCT__
4373 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4374 /*@
4375       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4376                  matrices from each processor
4377 
4378     Collective on MPI_Comm
4379 
4380    Input Parameters:
4381 +    comm - the communicators the parallel matrix will live on
4382 .    inmat - the input sequential matrices
4383 .    n - number of local columns (or PETSC_DECIDE)
4384 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4385 
4386    Output Parameter:
4387 .    outmat - the parallel matrix generated
4388 
4389     Level: advanced
4390 
4391    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4392 
4393 @*/
4394 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4395 {
4396   PetscErrorCode ierr;
4397 
4398   PetscFunctionBegin;
4399   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4400   if (scall == MAT_INITIAL_MATRIX){
4401     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4402   }
4403   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4404   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 #undef __FUNCT__
4409 #define __FUNCT__ "MatFileSplit"
4410 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4411 {
4412   PetscErrorCode    ierr;
4413   PetscMPIInt       rank;
4414   PetscInt          m,N,i,rstart,nnz;
4415   size_t            len;
4416   const PetscInt    *indx;
4417   PetscViewer       out;
4418   char              *name;
4419   Mat               B;
4420   const PetscScalar *values;
4421 
4422   PetscFunctionBegin;
4423   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4424   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4425   /* Should this be the type of the diagonal block of A? */
4426   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4427   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4428   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4429   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4430   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4431   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4432   for (i=0;i<m;i++) {
4433     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4434     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4435     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4436   }
4437   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4438   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4439 
4440   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4441   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4442   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4443   sprintf(name,"%s.%d",outfile,rank);
4444   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4445   ierr = PetscFree(name);
4446   ierr = MatView(B,out);CHKERRQ(ierr);
4447   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4448   ierr = MatDestroy(&B);CHKERRQ(ierr);
4449   PetscFunctionReturn(0);
4450 }
4451 
4452 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4453 #undef __FUNCT__
4454 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4455 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4456 {
4457   PetscErrorCode       ierr;
4458   Mat_Merge_SeqsToMPI  *merge;
4459   PetscContainer       container;
4460 
4461   PetscFunctionBegin;
4462   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4463   if (container) {
4464     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4465     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4466     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4467     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4468     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4469     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4470     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4471     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4472     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4473     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4474     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4475     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4476     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4477     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4478     ierr = PetscFree(merge);CHKERRQ(ierr);
4479     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4480   }
4481   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4482   PetscFunctionReturn(0);
4483 }
4484 
4485 #include <../src/mat/utils/freespace.h>
4486 #include <petscbt.h>
4487 
4488 #undef __FUNCT__
4489 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4490 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4491 {
4492   PetscErrorCode       ierr;
4493   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4494   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4495   PetscMPIInt          size,rank,taga,*len_s;
4496   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4497   PetscInt             proc,m;
4498   PetscInt             **buf_ri,**buf_rj;
4499   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4500   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4501   MPI_Request          *s_waits,*r_waits;
4502   MPI_Status           *status;
4503   MatScalar            *aa=a->a;
4504   MatScalar            **abuf_r,*ba_i;
4505   Mat_Merge_SeqsToMPI  *merge;
4506   PetscContainer       container;
4507 
4508   PetscFunctionBegin;
4509   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4510 
4511   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4512   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4513 
4514   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4515   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4516 
4517   bi     = merge->bi;
4518   bj     = merge->bj;
4519   buf_ri = merge->buf_ri;
4520   buf_rj = merge->buf_rj;
4521 
4522   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4523   owners = merge->rowmap->range;
4524   len_s  = merge->len_s;
4525 
4526   /* send and recv matrix values */
4527   /*-----------------------------*/
4528   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4529   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4530 
4531   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4532   for (proc=0,k=0; proc<size; proc++){
4533     if (!len_s[proc]) continue;
4534     i = owners[proc];
4535     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4536     k++;
4537   }
4538 
4539   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4540   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4541   ierr = PetscFree(status);CHKERRQ(ierr);
4542 
4543   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4544   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4545 
4546   /* insert mat values of mpimat */
4547   /*----------------------------*/
4548   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4549   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4550 
4551   for (k=0; k<merge->nrecv; k++){
4552     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4553     nrows = *(buf_ri_k[k]);
4554     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4555     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4556   }
4557 
4558   /* set values of ba */
4559   m = merge->rowmap->n;
4560   for (i=0; i<m; i++) {
4561     arow = owners[rank] + i;
4562     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4563     bnzi = bi[i+1] - bi[i];
4564     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4565 
4566     /* add local non-zero vals of this proc's seqmat into ba */
4567     anzi = ai[arow+1] - ai[arow];
4568     aj   = a->j + ai[arow];
4569     aa   = a->a + ai[arow];
4570     nextaj = 0;
4571     for (j=0; nextaj<anzi; j++){
4572       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4573         ba_i[j] += aa[nextaj++];
4574       }
4575     }
4576 
4577     /* add received vals into ba */
4578     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4579       /* i-th row */
4580       if (i == *nextrow[k]) {
4581         anzi = *(nextai[k]+1) - *nextai[k];
4582         aj   = buf_rj[k] + *(nextai[k]);
4583         aa   = abuf_r[k] + *(nextai[k]);
4584         nextaj = 0;
4585         for (j=0; nextaj<anzi; j++){
4586           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4587             ba_i[j] += aa[nextaj++];
4588           }
4589         }
4590         nextrow[k]++; nextai[k]++;
4591       }
4592     }
4593     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4594   }
4595   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4596   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4597 
4598   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4599   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4600   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4601   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4602   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4603   PetscFunctionReturn(0);
4604 }
4605 
4606 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4607 
4608 #undef __FUNCT__
4609 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4610 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4611 {
4612   PetscErrorCode       ierr;
4613   Mat                  B_mpi;
4614   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4615   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4616   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4617   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4618   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4619   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4620   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4621   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4622   MPI_Status           *status;
4623   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4624   PetscBT              lnkbt;
4625   Mat_Merge_SeqsToMPI  *merge;
4626   PetscContainer       container;
4627 
4628   PetscFunctionBegin;
4629   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4630 
4631   /* make sure it is a PETSc comm */
4632   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4633   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4634   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4635 
4636   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4637   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4638 
4639   /* determine row ownership */
4640   /*---------------------------------------------------------*/
4641   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4642   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4643   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4644   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4645   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4646   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4647   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4648 
4649   m      = merge->rowmap->n;
4650   M      = merge->rowmap->N;
4651   owners = merge->rowmap->range;
4652 
4653   /* determine the number of messages to send, their lengths */
4654   /*---------------------------------------------------------*/
4655   len_s  = merge->len_s;
4656 
4657   len = 0;  /* length of buf_si[] */
4658   merge->nsend = 0;
4659   for (proc=0; proc<size; proc++){
4660     len_si[proc] = 0;
4661     if (proc == rank){
4662       len_s[proc] = 0;
4663     } else {
4664       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4665       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4666     }
4667     if (len_s[proc]) {
4668       merge->nsend++;
4669       nrows = 0;
4670       for (i=owners[proc]; i<owners[proc+1]; i++){
4671         if (ai[i+1] > ai[i]) nrows++;
4672       }
4673       len_si[proc] = 2*(nrows+1);
4674       len += len_si[proc];
4675     }
4676   }
4677 
4678   /* determine the number and length of messages to receive for ij-structure */
4679   /*-------------------------------------------------------------------------*/
4680   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4681   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4682 
4683   /* post the Irecv of j-structure */
4684   /*-------------------------------*/
4685   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4686   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4687 
4688   /* post the Isend of j-structure */
4689   /*--------------------------------*/
4690   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4691 
4692   for (proc=0, k=0; proc<size; proc++){
4693     if (!len_s[proc]) continue;
4694     i = owners[proc];
4695     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4696     k++;
4697   }
4698 
4699   /* receives and sends of j-structure are complete */
4700   /*------------------------------------------------*/
4701   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4702   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4703 
4704   /* send and recv i-structure */
4705   /*---------------------------*/
4706   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4707   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4708 
4709   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4710   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4711   for (proc=0,k=0; proc<size; proc++){
4712     if (!len_s[proc]) continue;
4713     /* form outgoing message for i-structure:
4714          buf_si[0]:                 nrows to be sent
4715                [1:nrows]:           row index (global)
4716                [nrows+1:2*nrows+1]: i-structure index
4717     */
4718     /*-------------------------------------------*/
4719     nrows = len_si[proc]/2 - 1;
4720     buf_si_i    = buf_si + nrows+1;
4721     buf_si[0]   = nrows;
4722     buf_si_i[0] = 0;
4723     nrows = 0;
4724     for (i=owners[proc]; i<owners[proc+1]; i++){
4725       anzi = ai[i+1] - ai[i];
4726       if (anzi) {
4727         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4728         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4729         nrows++;
4730       }
4731     }
4732     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4733     k++;
4734     buf_si += len_si[proc];
4735   }
4736 
4737   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4738   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4739 
4740   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4741   for (i=0; i<merge->nrecv; i++){
4742     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4743   }
4744 
4745   ierr = PetscFree(len_si);CHKERRQ(ierr);
4746   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4747   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4748   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4749   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4750   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4751   ierr = PetscFree(status);CHKERRQ(ierr);
4752 
4753   /* compute a local seq matrix in each processor */
4754   /*----------------------------------------------*/
4755   /* allocate bi array and free space for accumulating nonzero column info */
4756   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4757   bi[0] = 0;
4758 
4759   /* create and initialize a linked list */
4760   nlnk = N+1;
4761   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4762 
4763   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4764   len = 0;
4765   len  = ai[owners[rank+1]] - ai[owners[rank]];
4766   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4767   current_space = free_space;
4768 
4769   /* determine symbolic info for each local row */
4770   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4771 
4772   for (k=0; k<merge->nrecv; k++){
4773     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4774     nrows = *buf_ri_k[k];
4775     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4776     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4777   }
4778 
4779   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4780   len = 0;
4781   for (i=0;i<m;i++) {
4782     bnzi   = 0;
4783     /* add local non-zero cols of this proc's seqmat into lnk */
4784     arow   = owners[rank] + i;
4785     anzi   = ai[arow+1] - ai[arow];
4786     aj     = a->j + ai[arow];
4787     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4788     bnzi += nlnk;
4789     /* add received col data into lnk */
4790     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4791       if (i == *nextrow[k]) { /* i-th row */
4792         anzi = *(nextai[k]+1) - *nextai[k];
4793         aj   = buf_rj[k] + *nextai[k];
4794         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4795         bnzi += nlnk;
4796         nextrow[k]++; nextai[k]++;
4797       }
4798     }
4799     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4800 
4801     /* if free space is not available, make more free space */
4802     if (current_space->local_remaining<bnzi) {
4803       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4804       nspacedouble++;
4805     }
4806     /* copy data into free space, then initialize lnk */
4807     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4808     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4809 
4810     current_space->array           += bnzi;
4811     current_space->local_used      += bnzi;
4812     current_space->local_remaining -= bnzi;
4813 
4814     bi[i+1] = bi[i] + bnzi;
4815   }
4816 
4817   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4818 
4819   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4820   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4821   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4822 
4823   /* create symbolic parallel matrix B_mpi */
4824   /*---------------------------------------*/
4825     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4826   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4827   if (n==PETSC_DECIDE) {
4828     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4829   } else {
4830     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4831   }
4832   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4833   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4834   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4835   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4836   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4837 
4838   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4839   B_mpi->assembled     = PETSC_FALSE;
4840   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4841   merge->bi            = bi;
4842   merge->bj            = bj;
4843   merge->buf_ri        = buf_ri;
4844   merge->buf_rj        = buf_rj;
4845   merge->coi           = PETSC_NULL;
4846   merge->coj           = PETSC_NULL;
4847   merge->owners_co     = PETSC_NULL;
4848 
4849   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4850 
4851   /* attach the supporting struct to B_mpi for reuse */
4852   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4853   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4854   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4855   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4856   *mpimat = B_mpi;
4857 
4858   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4859   PetscFunctionReturn(0);
4860 }
4861 
4862 #undef __FUNCT__
4863 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4864 /*@C
4865       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4866                  matrices from each processor
4867 
4868     Collective on MPI_Comm
4869 
4870    Input Parameters:
4871 +    comm - the communicators the parallel matrix will live on
4872 .    seqmat - the input sequential matrices
4873 .    m - number of local rows (or PETSC_DECIDE)
4874 .    n - number of local columns (or PETSC_DECIDE)
4875 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4876 
4877    Output Parameter:
4878 .    mpimat - the parallel matrix generated
4879 
4880     Level: advanced
4881 
4882    Notes:
4883      The dimensions of the sequential matrix in each processor MUST be the same.
4884      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4885      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4886 @*/
4887 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4888 {
4889   PetscErrorCode   ierr;
4890   PetscMPIInt     size;
4891 
4892   PetscFunctionBegin;
4893   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4894   if (size == 1){
4895      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4896      if (scall == MAT_INITIAL_MATRIX){
4897        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4898      } else {
4899        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4900      }
4901      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4902      PetscFunctionReturn(0);
4903   }
4904   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4905   if (scall == MAT_INITIAL_MATRIX){
4906     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4907   }
4908   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4909   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4910   PetscFunctionReturn(0);
4911 }
4912 
4913 #undef __FUNCT__
4914 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4915 /*@
4916      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4917           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4918           with MatGetSize()
4919 
4920     Not Collective
4921 
4922    Input Parameters:
4923 +    A - the matrix
4924 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4925 
4926    Output Parameter:
4927 .    A_loc - the local sequential matrix generated
4928 
4929     Level: developer
4930 
4931 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4932 
4933 @*/
4934 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4935 {
4936   PetscErrorCode  ierr;
4937   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4938   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4939   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4940   MatScalar       *aa=a->a,*ba=b->a,*cam;
4941   PetscScalar     *ca;
4942   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4943   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4944   PetscBool       match;
4945 
4946   PetscFunctionBegin;
4947   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4948   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4949   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4950   if (scall == MAT_INITIAL_MATRIX){
4951     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4952     ci[0] = 0;
4953     for (i=0; i<am; i++){
4954       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4955     }
4956     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4957     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4958     k = 0;
4959     for (i=0; i<am; i++) {
4960       ncols_o = bi[i+1] - bi[i];
4961       ncols_d = ai[i+1] - ai[i];
4962       /* off-diagonal portion of A */
4963       for (jo=0; jo<ncols_o; jo++) {
4964         col = cmap[*bj];
4965         if (col >= cstart) break;
4966         cj[k]   = col; bj++;
4967         ca[k++] = *ba++;
4968       }
4969       /* diagonal portion of A */
4970       for (j=0; j<ncols_d; j++) {
4971         cj[k]   = cstart + *aj++;
4972         ca[k++] = *aa++;
4973       }
4974       /* off-diagonal portion of A */
4975       for (j=jo; j<ncols_o; j++) {
4976         cj[k]   = cmap[*bj++];
4977         ca[k++] = *ba++;
4978       }
4979     }
4980     /* put together the new matrix */
4981     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4982     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4983     /* Since these are PETSc arrays, change flags to free them as necessary. */
4984     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4985     mat->free_a  = PETSC_TRUE;
4986     mat->free_ij = PETSC_TRUE;
4987     mat->nonew   = 0;
4988   } else if (scall == MAT_REUSE_MATRIX){
4989     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4990     ci = mat->i; cj = mat->j; cam = mat->a;
4991     for (i=0; i<am; i++) {
4992       /* off-diagonal portion of A */
4993       ncols_o = bi[i+1] - bi[i];
4994       for (jo=0; jo<ncols_o; jo++) {
4995         col = cmap[*bj];
4996         if (col >= cstart) break;
4997         *cam++ = *ba++; bj++;
4998       }
4999       /* diagonal portion of A */
5000       ncols_d = ai[i+1] - ai[i];
5001       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5002       /* off-diagonal portion of A */
5003       for (j=jo; j<ncols_o; j++) {
5004         *cam++ = *ba++; bj++;
5005       }
5006     }
5007   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5008   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5009   PetscFunctionReturn(0);
5010 }
5011 
5012 #undef __FUNCT__
5013 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5014 /*@C
5015      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5016 
5017     Not Collective
5018 
5019    Input Parameters:
5020 +    A - the matrix
5021 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5022 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5023 
5024    Output Parameter:
5025 .    A_loc - the local sequential matrix generated
5026 
5027     Level: developer
5028 
5029 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5030 
5031 @*/
5032 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5033 {
5034   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5035   PetscErrorCode    ierr;
5036   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5037   IS                isrowa,iscola;
5038   Mat               *aloc;
5039   PetscBool       match;
5040 
5041   PetscFunctionBegin;
5042   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5043   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5044   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5045   if (!row){
5046     start = A->rmap->rstart; end = A->rmap->rend;
5047     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5048   } else {
5049     isrowa = *row;
5050   }
5051   if (!col){
5052     start = A->cmap->rstart;
5053     cmap  = a->garray;
5054     nzA   = a->A->cmap->n;
5055     nzB   = a->B->cmap->n;
5056     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5057     ncols = 0;
5058     for (i=0; i<nzB; i++) {
5059       if (cmap[i] < start) idx[ncols++] = cmap[i];
5060       else break;
5061     }
5062     imark = i;
5063     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5064     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5065     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5066   } else {
5067     iscola = *col;
5068   }
5069   if (scall != MAT_INITIAL_MATRIX){
5070     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5071     aloc[0] = *A_loc;
5072   }
5073   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5074   *A_loc = aloc[0];
5075   ierr = PetscFree(aloc);CHKERRQ(ierr);
5076   if (!row){
5077     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5078   }
5079   if (!col){
5080     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5081   }
5082   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5083   PetscFunctionReturn(0);
5084 }
5085 
5086 #undef __FUNCT__
5087 #define __FUNCT__ "MatGetBrowsOfAcols"
5088 /*@C
5089     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5090 
5091     Collective on Mat
5092 
5093    Input Parameters:
5094 +    A,B - the matrices in mpiaij format
5095 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5096 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5097 
5098    Output Parameter:
5099 +    rowb, colb - index sets of rows and columns of B to extract
5100 -    B_seq - the sequential matrix generated
5101 
5102     Level: developer
5103 
5104 @*/
5105 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5106 {
5107   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5108   PetscErrorCode    ierr;
5109   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5110   IS                isrowb,iscolb;
5111   Mat               *bseq=PETSC_NULL;
5112 
5113   PetscFunctionBegin;
5114   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5115     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5116   }
5117   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5118 
5119   if (scall == MAT_INITIAL_MATRIX){
5120     start = A->cmap->rstart;
5121     cmap  = a->garray;
5122     nzA   = a->A->cmap->n;
5123     nzB   = a->B->cmap->n;
5124     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5125     ncols = 0;
5126     for (i=0; i<nzB; i++) {  /* row < local row index */
5127       if (cmap[i] < start) idx[ncols++] = cmap[i];
5128       else break;
5129     }
5130     imark = i;
5131     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5132     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5133     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5134     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5135   } else {
5136     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5137     isrowb = *rowb; iscolb = *colb;
5138     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5139     bseq[0] = *B_seq;
5140   }
5141   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5142   *B_seq = bseq[0];
5143   ierr = PetscFree(bseq);CHKERRQ(ierr);
5144   if (!rowb){
5145     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5146   } else {
5147     *rowb = isrowb;
5148   }
5149   if (!colb){
5150     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5151   } else {
5152     *colb = iscolb;
5153   }
5154   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5155   PetscFunctionReturn(0);
5156 }
5157 
5158 #undef __FUNCT__
5159 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5160 /*
5161     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5162     of the OFF-DIAGONAL portion of local A
5163 
5164     Collective on Mat
5165 
5166    Input Parameters:
5167 +    A,B - the matrices in mpiaij format
5168 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5169 
5170    Output Parameter:
5171 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5172 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5173 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5174 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5175 
5176     Level: developer
5177 
5178 */
5179 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5180 {
5181   VecScatter_MPI_General *gen_to,*gen_from;
5182   PetscErrorCode         ierr;
5183   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5184   Mat_SeqAIJ             *b_oth;
5185   VecScatter             ctx=a->Mvctx;
5186   MPI_Comm               comm=((PetscObject)ctx)->comm;
5187   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5188   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5189   PetscScalar            *rvalues,*svalues;
5190   MatScalar              *b_otha,*bufa,*bufA;
5191   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5192   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5193   MPI_Status             *sstatus,rstatus;
5194   PetscMPIInt            jj;
5195   PetscInt               *cols,sbs,rbs;
5196   PetscScalar            *vals;
5197 
5198   PetscFunctionBegin;
5199   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5200     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5201   }
5202   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5203   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5204 
5205   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5206   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5207   rvalues  = gen_from->values; /* holds the length of receiving row */
5208   svalues  = gen_to->values;   /* holds the length of sending row */
5209   nrecvs   = gen_from->n;
5210   nsends   = gen_to->n;
5211 
5212   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5213   srow     = gen_to->indices;   /* local row index to be sent */
5214   sstarts  = gen_to->starts;
5215   sprocs   = gen_to->procs;
5216   sstatus  = gen_to->sstatus;
5217   sbs      = gen_to->bs;
5218   rstarts  = gen_from->starts;
5219   rprocs   = gen_from->procs;
5220   rbs      = gen_from->bs;
5221 
5222   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5223   if (scall == MAT_INITIAL_MATRIX){
5224     /* i-array */
5225     /*---------*/
5226     /*  post receives */
5227     for (i=0; i<nrecvs; i++){
5228       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5229       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5230       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5231     }
5232 
5233     /* pack the outgoing message */
5234     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5235     sstartsj[0] = 0;  rstartsj[0] = 0;
5236     len = 0; /* total length of j or a array to be sent */
5237     k = 0;
5238     for (i=0; i<nsends; i++){
5239       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5240       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5241       for (j=0; j<nrows; j++) {
5242         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5243         for (l=0; l<sbs; l++){
5244           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5245           rowlen[j*sbs+l] = ncols;
5246           len += ncols;
5247           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5248         }
5249         k++;
5250       }
5251       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5252       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5253     }
5254     /* recvs and sends of i-array are completed */
5255     i = nrecvs;
5256     while (i--) {
5257       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5258     }
5259     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5260 
5261     /* allocate buffers for sending j and a arrays */
5262     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5263     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5264 
5265     /* create i-array of B_oth */
5266     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5267     b_othi[0] = 0;
5268     len = 0; /* total length of j or a array to be received */
5269     k = 0;
5270     for (i=0; i<nrecvs; i++){
5271       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5272       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5273       for (j=0; j<nrows; j++) {
5274         b_othi[k+1] = b_othi[k] + rowlen[j];
5275         len += rowlen[j]; k++;
5276       }
5277       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5278     }
5279 
5280     /* allocate space for j and a arrrays of B_oth */
5281     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5282     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5283 
5284     /* j-array */
5285     /*---------*/
5286     /*  post receives of j-array */
5287     for (i=0; i<nrecvs; i++){
5288       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5289       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5290     }
5291 
5292     /* pack the outgoing message j-array */
5293     k = 0;
5294     for (i=0; i<nsends; i++){
5295       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5296       bufJ = bufj+sstartsj[i];
5297       for (j=0; j<nrows; j++) {
5298         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5299         for (ll=0; ll<sbs; ll++){
5300           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5301           for (l=0; l<ncols; l++){
5302             *bufJ++ = cols[l];
5303           }
5304           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5305         }
5306       }
5307       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5308     }
5309 
5310     /* recvs and sends of j-array are completed */
5311     i = nrecvs;
5312     while (i--) {
5313       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5314     }
5315     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5316   } else if (scall == MAT_REUSE_MATRIX){
5317     sstartsj = *startsj_s;
5318     rstartsj = *startsj_r;
5319     bufa     = *bufa_ptr;
5320     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5321     b_otha   = b_oth->a;
5322   } else {
5323     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5324   }
5325 
5326   /* a-array */
5327   /*---------*/
5328   /*  post receives of a-array */
5329   for (i=0; i<nrecvs; i++){
5330     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5331     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5332   }
5333 
5334   /* pack the outgoing message a-array */
5335   k = 0;
5336   for (i=0; i<nsends; i++){
5337     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5338     bufA = bufa+sstartsj[i];
5339     for (j=0; j<nrows; j++) {
5340       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5341       for (ll=0; ll<sbs; ll++){
5342         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5343         for (l=0; l<ncols; l++){
5344           *bufA++ = vals[l];
5345         }
5346         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5347       }
5348     }
5349     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5350   }
5351   /* recvs and sends of a-array are completed */
5352   i = nrecvs;
5353   while (i--) {
5354     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5355   }
5356   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5357   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5358 
5359   if (scall == MAT_INITIAL_MATRIX){
5360     /* put together the new matrix */
5361     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5362 
5363     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5364     /* Since these are PETSc arrays, change flags to free them as necessary. */
5365     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5366     b_oth->free_a  = PETSC_TRUE;
5367     b_oth->free_ij = PETSC_TRUE;
5368     b_oth->nonew   = 0;
5369 
5370     ierr = PetscFree(bufj);CHKERRQ(ierr);
5371     if (!startsj_s || !bufa_ptr){
5372       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5373       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5374     } else {
5375       *startsj_s = sstartsj;
5376       *startsj_r = rstartsj;
5377       *bufa_ptr  = bufa;
5378     }
5379   }
5380   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5381   PetscFunctionReturn(0);
5382 }
5383 
5384 #undef __FUNCT__
5385 #define __FUNCT__ "MatGetCommunicationStructs"
5386 /*@C
5387   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5388 
5389   Not Collective
5390 
5391   Input Parameters:
5392 . A - The matrix in mpiaij format
5393 
5394   Output Parameter:
5395 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5396 . colmap - A map from global column index to local index into lvec
5397 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5398 
5399   Level: developer
5400 
5401 @*/
5402 #if defined (PETSC_USE_CTABLE)
5403 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5404 #else
5405 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5406 #endif
5407 {
5408   Mat_MPIAIJ *a;
5409 
5410   PetscFunctionBegin;
5411   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5412   PetscValidPointer(lvec, 2);
5413   PetscValidPointer(colmap, 3);
5414   PetscValidPointer(multScatter, 4);
5415   a = (Mat_MPIAIJ *) A->data;
5416   if (lvec) *lvec = a->lvec;
5417   if (colmap) *colmap = a->colmap;
5418   if (multScatter) *multScatter = a->Mvctx;
5419   PetscFunctionReturn(0);
5420 }
5421 
5422 EXTERN_C_BEGIN
5423 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5424 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5425 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5426 EXTERN_C_END
5427 
5428 #undef __FUNCT__
5429 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5430 /*
5431     Computes (B'*A')' since computing B*A directly is untenable
5432 
5433                n                       p                          p
5434         (              )       (              )         (                  )
5435       m (      A       )  *  n (       B      )   =   m (         C        )
5436         (              )       (              )         (                  )
5437 
5438 */
5439 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5440 {
5441   PetscErrorCode     ierr;
5442   Mat                At,Bt,Ct;
5443 
5444   PetscFunctionBegin;
5445   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5446   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5447   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5448   ierr = MatDestroy(&At);CHKERRQ(ierr);
5449   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5450   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5451   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5452   PetscFunctionReturn(0);
5453 }
5454 
5455 #undef __FUNCT__
5456 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5457 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5458 {
5459   PetscErrorCode ierr;
5460   PetscInt       m=A->rmap->n,n=B->cmap->n;
5461   Mat            Cmat;
5462 
5463   PetscFunctionBegin;
5464   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5465   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5466   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5467   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5468   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5469   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5470   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5471   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5472   *C   = Cmat;
5473   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5474   PetscFunctionReturn(0);
5475 }
5476 
5477 /* ----------------------------------------------------------------*/
5478 #undef __FUNCT__
5479 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5480 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5481 {
5482   PetscErrorCode ierr;
5483 
5484   PetscFunctionBegin;
5485   if (scall == MAT_INITIAL_MATRIX){
5486     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5487   }
5488   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5489   PetscFunctionReturn(0);
5490 }
5491 
5492 EXTERN_C_BEGIN
5493 #if defined(PETSC_HAVE_MUMPS)
5494 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5495 #endif
5496 #if defined(PETSC_HAVE_PASTIX)
5497 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5498 #endif
5499 #if defined(PETSC_HAVE_SUPERLU_DIST)
5500 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5501 #endif
5502 #if defined(PETSC_HAVE_SPOOLES)
5503 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5504 #endif
5505 EXTERN_C_END
5506 
5507 /*MC
5508    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5509 
5510    Options Database Keys:
5511 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5512 
5513   Level: beginner
5514 
5515 .seealso: MatCreateAIJ()
5516 M*/
5517 
5518 EXTERN_C_BEGIN
5519 #undef __FUNCT__
5520 #define __FUNCT__ "MatCreate_MPIAIJ"
5521 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5522 {
5523   Mat_MPIAIJ     *b;
5524   PetscErrorCode ierr;
5525   PetscMPIInt    size;
5526 
5527   PetscFunctionBegin;
5528   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5529   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5530   B->data         = (void*)b;
5531   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5532   B->assembled    = PETSC_FALSE;
5533   B->insertmode   = NOT_SET_VALUES;
5534   b->size         = size;
5535   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5536 
5537   /* build cache for off array entries formed */
5538   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5539   b->donotstash  = PETSC_FALSE;
5540   b->colmap      = 0;
5541   b->garray      = 0;
5542   b->roworiented = PETSC_TRUE;
5543 
5544   /* stuff used for matrix vector multiply */
5545   b->lvec      = PETSC_NULL;
5546   b->Mvctx     = PETSC_NULL;
5547 
5548   /* stuff for MatGetRow() */
5549   b->rowindices   = 0;
5550   b->rowvalues    = 0;
5551   b->getrowactive = PETSC_FALSE;
5552 
5553   /* flexible pointer used in CUSP/CUSPARSE classes */
5554   b->spptr        = PETSC_NULL;
5555 #if defined(PETSC_HAVE_SPOOLES)
5556   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5557                                      "MatGetFactor_mpiaij_spooles",
5558                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5559 #endif
5560 #if defined(PETSC_HAVE_MUMPS)
5561   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5562                                      "MatGetFactor_aij_mumps",
5563                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5564 #endif
5565 #if defined(PETSC_HAVE_PASTIX)
5566   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5567 					   "MatGetFactor_mpiaij_pastix",
5568 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5569 #endif
5570 #if defined(PETSC_HAVE_SUPERLU_DIST)
5571   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5572                                      "MatGetFactor_mpiaij_superlu_dist",
5573                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5574 #endif
5575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5576                                      "MatStoreValues_MPIAIJ",
5577                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5578   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5579                                      "MatRetrieveValues_MPIAIJ",
5580                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5581   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5582 				     "MatGetDiagonalBlock_MPIAIJ",
5583                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5584   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5585 				     "MatIsTranspose_MPIAIJ",
5586 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5587   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5588 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5589 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5590   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5591 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5592 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5593   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5594 				     "MatDiagonalScaleLocal_MPIAIJ",
5595 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5597                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5598                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5600                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5601                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5603                                      "MatConvert_MPIAIJ_MPISBAIJ",
5604                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5606                                      "MatMatMult_MPIDense_MPIAIJ",
5607                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5609                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5610                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5612                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5613                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5615   PetscFunctionReturn(0);
5616 }
5617 EXTERN_C_END
5618 
5619 #undef __FUNCT__
5620 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5621 /*@
5622      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5623          and "off-diagonal" part of the matrix in CSR format.
5624 
5625    Collective on MPI_Comm
5626 
5627    Input Parameters:
5628 +  comm - MPI communicator
5629 .  m - number of local rows (Cannot be PETSC_DECIDE)
5630 .  n - This value should be the same as the local size used in creating the
5631        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5632        calculated if N is given) For square matrices n is almost always m.
5633 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5634 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5635 .   i - row indices for "diagonal" portion of matrix
5636 .   j - column indices
5637 .   a - matrix values
5638 .   oi - row indices for "off-diagonal" portion of matrix
5639 .   oj - column indices
5640 -   oa - matrix values
5641 
5642    Output Parameter:
5643 .   mat - the matrix
5644 
5645    Level: advanced
5646 
5647    Notes:
5648        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5649        must free the arrays once the matrix has been destroyed and not before.
5650 
5651        The i and j indices are 0 based
5652 
5653        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5654 
5655        This sets local rows and cannot be used to set off-processor values.
5656 
5657        You cannot later use MatSetValues() to change values in this matrix.
5658 
5659 .keywords: matrix, aij, compressed row, sparse, parallel
5660 
5661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5662           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5663 @*/
5664 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5665 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5666 {
5667   PetscErrorCode ierr;
5668   Mat_MPIAIJ     *maij;
5669 
5670  PetscFunctionBegin;
5671   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5672   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5673   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5674   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5675   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5676   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5677   maij = (Mat_MPIAIJ*) (*mat)->data;
5678   maij->donotstash     = PETSC_TRUE;
5679   (*mat)->preallocated = PETSC_TRUE;
5680 
5681   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5682   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5683 
5684   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5685   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5686 
5687   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5688   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5689   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5690   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5691 
5692   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5693   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5694   PetscFunctionReturn(0);
5695 }
5696 
5697 /*
5698     Special version for direct calls from Fortran
5699 */
5700 #include <petsc-private/fortranimpl.h>
5701 
5702 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5703 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5704 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5705 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5706 #endif
5707 
5708 /* Change these macros so can be used in void function */
5709 #undef CHKERRQ
5710 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5711 #undef SETERRQ2
5712 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5713 #undef SETERRQ3
5714 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5715 #undef SETERRQ
5716 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5717 
5718 EXTERN_C_BEGIN
5719 #undef __FUNCT__
5720 #define __FUNCT__ "matsetvaluesmpiaij_"
5721 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5722 {
5723   Mat             mat = *mmat;
5724   PetscInt        m = *mm, n = *mn;
5725   InsertMode      addv = *maddv;
5726   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5727   PetscScalar     value;
5728   PetscErrorCode  ierr;
5729 
5730   MatCheckPreallocated(mat,1);
5731   if (mat->insertmode == NOT_SET_VALUES) {
5732     mat->insertmode = addv;
5733   }
5734 #if defined(PETSC_USE_DEBUG)
5735   else if (mat->insertmode != addv) {
5736     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5737   }
5738 #endif
5739   {
5740   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5741   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5742   PetscBool       roworiented = aij->roworiented;
5743 
5744   /* Some Variables required in the macro */
5745   Mat             A = aij->A;
5746   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5747   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5748   MatScalar       *aa = a->a;
5749   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5750   Mat             B = aij->B;
5751   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5752   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5753   MatScalar       *ba = b->a;
5754 
5755   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5756   PetscInt        nonew = a->nonew;
5757   MatScalar       *ap1,*ap2;
5758 
5759   PetscFunctionBegin;
5760   for (i=0; i<m; i++) {
5761     if (im[i] < 0) continue;
5762 #if defined(PETSC_USE_DEBUG)
5763     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5764 #endif
5765     if (im[i] >= rstart && im[i] < rend) {
5766       row      = im[i] - rstart;
5767       lastcol1 = -1;
5768       rp1      = aj + ai[row];
5769       ap1      = aa + ai[row];
5770       rmax1    = aimax[row];
5771       nrow1    = ailen[row];
5772       low1     = 0;
5773       high1    = nrow1;
5774       lastcol2 = -1;
5775       rp2      = bj + bi[row];
5776       ap2      = ba + bi[row];
5777       rmax2    = bimax[row];
5778       nrow2    = bilen[row];
5779       low2     = 0;
5780       high2    = nrow2;
5781 
5782       for (j=0; j<n; j++) {
5783         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5784         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5785         if (in[j] >= cstart && in[j] < cend){
5786           col = in[j] - cstart;
5787           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5788         } else if (in[j] < 0) continue;
5789 #if defined(PETSC_USE_DEBUG)
5790         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5791 #endif
5792         else {
5793           if (mat->was_assembled) {
5794             if (!aij->colmap) {
5795               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5796             }
5797 #if defined (PETSC_USE_CTABLE)
5798             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5799 	    col--;
5800 #else
5801             col = aij->colmap[in[j]] - 1;
5802 #endif
5803             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5804               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5805               col =  in[j];
5806               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5807               B = aij->B;
5808               b = (Mat_SeqAIJ*)B->data;
5809               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5810               rp2      = bj + bi[row];
5811               ap2      = ba + bi[row];
5812               rmax2    = bimax[row];
5813               nrow2    = bilen[row];
5814               low2     = 0;
5815               high2    = nrow2;
5816               bm       = aij->B->rmap->n;
5817               ba = b->a;
5818             }
5819           } else col = in[j];
5820           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5821         }
5822       }
5823     } else {
5824       if (!aij->donotstash) {
5825         if (roworiented) {
5826           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5827         } else {
5828           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5829         }
5830       }
5831     }
5832   }}
5833   PetscFunctionReturnVoid();
5834 }
5835 EXTERN_C_END
5836 
5837