xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e52d2c62b911f45b6dbda30e9b6af029f19046ae)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
175 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
176 {
177   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
178   IS              sis,gis;
179   PetscErrorCode  ierr;
180   const PetscInt  *isis,*igis;
181   PetscInt        n,*iis,nsis,ngis,rstart,i;
182 
183   PetscFunctionBegin;
184   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
185   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
186   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
187   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
188   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
189   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
190 
191   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
192   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
193   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
194   n    = ngis + nsis;
195   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
196   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
197   for (i=0; i<n; i++) iis[i] += rstart;
198   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
199 
200   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
201   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
202   ierr = ISDestroy(&sis);CHKERRQ(ierr);
203   ierr = ISDestroy(&gis);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatDistribute_MPIAIJ"
209 /*
210     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
211     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
212 
213     Only for square matrices
214 
215     Used by a preconditioner, hence PETSC_EXTERN
216 */
217 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
218 {
219   PetscMPIInt    rank,size;
220   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
221   PetscErrorCode ierr;
222   Mat            mat;
223   Mat_SeqAIJ     *gmata;
224   PetscMPIInt    tag;
225   MPI_Status     status;
226   PetscBool      aij;
227   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
228 
229   PetscFunctionBegin;
230   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
231   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
232   if (!rank) {
233     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
234     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
235   }
236   if (reuse == MAT_INITIAL_MATRIX) {
237     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
238     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
239     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
240     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
241     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
242     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
243     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
244     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
245     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
246 
247     rowners[0] = 0;
248     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
249     rstart = rowners[rank];
250     rend   = rowners[rank+1];
251     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
252     if (!rank) {
253       gmata = (Mat_SeqAIJ*) gmat->data;
254       /* send row lengths to all processors */
255       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
256       for (i=1; i<size; i++) {
257         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259       /* determine number diagonal and off-diagonal counts */
260       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
261       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
262       jj   = 0;
263       for (i=0; i<m; i++) {
264         for (j=0; j<dlens[i]; j++) {
265           if (gmata->j[jj] < rstart) ld[i]++;
266           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
267           jj++;
268         }
269       }
270       /* send column indices to other processes */
271       for (i=1; i<size; i++) {
272         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
273         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
274         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
275       }
276 
277       /* send numerical values to other processes */
278       for (i=1; i<size; i++) {
279         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
280         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
281       }
282       gmataa = gmata->a;
283       gmataj = gmata->j;
284 
285     } else {
286       /* receive row lengths */
287       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
288       /* receive column indices */
289       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
290       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
291       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
292       /* determine number diagonal and off-diagonal counts */
293       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
294       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
295       jj   = 0;
296       for (i=0; i<m; i++) {
297         for (j=0; j<dlens[i]; j++) {
298           if (gmataj[jj] < rstart) ld[i]++;
299           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
300           jj++;
301         }
302       }
303       /* receive numerical values */
304       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
305       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
306     }
307     /* set preallocation */
308     for (i=0; i<m; i++) {
309       dlens[i] -= olens[i];
310     }
311     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
312     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
313 
314     for (i=0; i<m; i++) {
315       dlens[i] += olens[i];
316     }
317     cnt = 0;
318     for (i=0; i<m; i++) {
319       row  = rstart + i;
320       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
321       cnt += dlens[i];
322     }
323     if (rank) {
324       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
325     }
326     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
327     ierr = PetscFree(rowners);CHKERRQ(ierr);
328 
329     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
330 
331     *inmat = mat;
332   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
333     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
334     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
335     mat  = *inmat;
336     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
337     if (!rank) {
338       /* send numerical values to other processes */
339       gmata  = (Mat_SeqAIJ*) gmat->data;
340       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
341       gmataa = gmata->a;
342       for (i=1; i<size; i++) {
343         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
344         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
345       }
346       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
347     } else {
348       /* receive numerical values from process 0*/
349       nz   = Ad->nz + Ao->nz;
350       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
351       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
352     }
353     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
354     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
355     ad = Ad->a;
356     ao = Ao->a;
357     if (mat->rmap->n) {
358       i  = 0;
359       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
360       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
361     }
362     for (i=1; i<mat->rmap->n; i++) {
363       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
364       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
365     }
366     i--;
367     if (mat->rmap->n) {
368       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
369     }
370     if (rank) {
371       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
372     }
373   }
374   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
376   PetscFunctionReturn(0);
377 }
378 
379 /*
380   Local utility routine that creates a mapping from the global column
381 number to the local number in the off-diagonal part of the local
382 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
383 a slightly higher hash table cost; without it it is not scalable (each processor
384 has an order N integer array but is fast to acess.
385 */
386 #undef __FUNCT__
387 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
388 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
389 {
390   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
391   PetscErrorCode ierr;
392   PetscInt       n = aij->B->cmap->n,i;
393 
394   PetscFunctionBegin;
395   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
396 #if defined(PETSC_USE_CTABLE)
397   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
398   for (i=0; i<n; i++) {
399     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
400   }
401 #else
402   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
403   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
404   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
405 #endif
406   PetscFunctionReturn(0);
407 }
408 
409 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
410 { \
411     if (col <= lastcol1)  low1 = 0;     \
412     else                 high1 = nrow1; \
413     lastcol1 = col;\
414     while (high1-low1 > 5) { \
415       t = (low1+high1)/2; \
416       if (rp1[t] > col) high1 = t; \
417       else              low1  = t; \
418     } \
419       for (_i=low1; _i<high1; _i++) { \
420         if (rp1[_i] > col) break; \
421         if (rp1[_i] == col) { \
422           if (addv == ADD_VALUES) ap1[_i] += value;   \
423           else                    ap1[_i] = value; \
424           goto a_noinsert; \
425         } \
426       }  \
427       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
428       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
429       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
430       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
431       N = nrow1++ - 1; a->nz++; high1++; \
432       /* shift up all the later entries in this row */ \
433       for (ii=N; ii>=_i; ii--) { \
434         rp1[ii+1] = rp1[ii]; \
435         ap1[ii+1] = ap1[ii]; \
436       } \
437       rp1[_i] = col;  \
438       ap1[_i] = value;  \
439       A->nonzerostate++;\
440       a_noinsert: ; \
441       ailen[row] = nrow1; \
442 }
443 
444 
445 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
446   { \
447     if (col <= lastcol2) low2 = 0;                        \
448     else high2 = nrow2;                                   \
449     lastcol2 = col;                                       \
450     while (high2-low2 > 5) {                              \
451       t = (low2+high2)/2;                                 \
452       if (rp2[t] > col) high2 = t;                        \
453       else             low2  = t;                         \
454     }                                                     \
455     for (_i=low2; _i<high2; _i++) {                       \
456       if (rp2[_i] > col) break;                           \
457       if (rp2[_i] == col) {                               \
458         if (addv == ADD_VALUES) ap2[_i] += value;         \
459         else                    ap2[_i] = value;          \
460         goto b_noinsert;                                  \
461       }                                                   \
462     }                                                     \
463     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
464     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
465     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
466     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
467     N = nrow2++ - 1; b->nz++; high2++;                    \
468     /* shift up all the later entries in this row */      \
469     for (ii=N; ii>=_i; ii--) {                            \
470       rp2[ii+1] = rp2[ii];                                \
471       ap2[ii+1] = ap2[ii];                                \
472     }                                                     \
473     rp2[_i] = col;                                        \
474     ap2[_i] = value;                                      \
475     B->nonzerostate++;                                    \
476     b_noinsert: ;                                         \
477     bilen[row] = nrow2;                                   \
478   }
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
482 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
483 {
484   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
485   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
486   PetscErrorCode ierr;
487   PetscInt       l,*garray = mat->garray,diag;
488 
489   PetscFunctionBegin;
490   /* code only works for square matrices A */
491 
492   /* find size of row to the left of the diagonal part */
493   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
494   row  = row - diag;
495   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
496     if (garray[b->j[b->i[row]+l]] > diag) break;
497   }
498   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
499 
500   /* diagonal part */
501   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
502 
503   /* right of diagonal part */
504   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "MatSetValues_MPIAIJ"
510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
511 {
512   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
513   PetscScalar    value;
514   PetscErrorCode ierr;
515   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
516   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
517   PetscBool      roworiented = aij->roworiented;
518 
519   /* Some Variables required in the macro */
520   Mat        A                 = aij->A;
521   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
522   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
523   MatScalar  *aa               = a->a;
524   PetscBool  ignorezeroentries = a->ignorezeroentries;
525   Mat        B                 = aij->B;
526   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
527   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
528   MatScalar  *ba               = b->a;
529 
530   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
531   PetscInt  nonew;
532   MatScalar *ap1,*ap2;
533 
534   PetscFunctionBegin;
535   for (i=0; i<m; i++) {
536     if (im[i] < 0) continue;
537 #if defined(PETSC_USE_DEBUG)
538     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
539 #endif
540     if (im[i] >= rstart && im[i] < rend) {
541       row      = im[i] - rstart;
542       lastcol1 = -1;
543       rp1      = aj + ai[row];
544       ap1      = aa + ai[row];
545       rmax1    = aimax[row];
546       nrow1    = ailen[row];
547       low1     = 0;
548       high1    = nrow1;
549       lastcol2 = -1;
550       rp2      = bj + bi[row];
551       ap2      = ba + bi[row];
552       rmax2    = bimax[row];
553       nrow2    = bilen[row];
554       low2     = 0;
555       high2    = nrow2;
556 
557       for (j=0; j<n; j++) {
558         if (roworiented) value = v[i*n+j];
559         else             value = v[i+j*m];
560         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
561         if (in[j] >= cstart && in[j] < cend) {
562           col   = in[j] - cstart;
563           nonew = a->nonew;
564           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
565         } else if (in[j] < 0) continue;
566 #if defined(PETSC_USE_DEBUG)
567         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
568 #endif
569         else {
570           if (mat->was_assembled) {
571             if (!aij->colmap) {
572               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
573             }
574 #if defined(PETSC_USE_CTABLE)
575             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
576             col--;
577 #else
578             col = aij->colmap[in[j]] - 1;
579 #endif
580             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
581               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
582               col  =  in[j];
583               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
584               B     = aij->B;
585               b     = (Mat_SeqAIJ*)B->data;
586               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
587               rp2   = bj + bi[row];
588               ap2   = ba + bi[row];
589               rmax2 = bimax[row];
590               nrow2 = bilen[row];
591               low2  = 0;
592               high2 = nrow2;
593               bm    = aij->B->rmap->n;
594               ba    = b->a;
595             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
596           } else col = in[j];
597           nonew = b->nonew;
598           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
599         }
600       }
601     } else {
602       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
603       if (!aij->donotstash) {
604         mat->assembled = PETSC_FALSE;
605         if (roworiented) {
606           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
607         } else {
608           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
609         }
610       }
611     }
612   }
613   PetscFunctionReturn(0);
614 }
615 
616 #undef __FUNCT__
617 #define __FUNCT__ "MatGetValues_MPIAIJ"
618 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
619 {
620   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
621   PetscErrorCode ierr;
622   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
623   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
624 
625   PetscFunctionBegin;
626   for (i=0; i<m; i++) {
627     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
628     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
629     if (idxm[i] >= rstart && idxm[i] < rend) {
630       row = idxm[i] - rstart;
631       for (j=0; j<n; j++) {
632         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
633         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
634         if (idxn[j] >= cstart && idxn[j] < cend) {
635           col  = idxn[j] - cstart;
636           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
637         } else {
638           if (!aij->colmap) {
639             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
640           }
641 #if defined(PETSC_USE_CTABLE)
642           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
643           col--;
644 #else
645           col = aij->colmap[idxn[j]] - 1;
646 #endif
647           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
648           else {
649             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
650           }
651         }
652       }
653     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
654   }
655   PetscFunctionReturn(0);
656 }
657 
658 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
659 
660 #undef __FUNCT__
661 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
663 {
664   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
665   PetscErrorCode ierr;
666   PetscInt       nstash,reallocs;
667   InsertMode     addv;
668 
669   PetscFunctionBegin;
670   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
671 
672   /* make sure all processors are either in INSERTMODE or ADDMODE */
673   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
674   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
675   mat->insertmode = addv; /* in case this processor had no cache */
676 
677   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
678   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
679   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
680   PetscFunctionReturn(0);
681 }
682 
683 #undef __FUNCT__
684 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695   InsertMode     addv = mat->insertmode;
696 
697   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
698 
699   PetscFunctionBegin;
700   if (!aij->donotstash && !mat->nooffprocentries) {
701     while (1) {
702       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
703       if (!flg) break;
704 
705       for (i=0; i<n; ) {
706         /* Now identify the consecutive vals belonging to the same row */
707         for (j=i,rstart=row[j]; j<n; j++) {
708           if (row[j] != rstart) break;
709         }
710         if (j < n) ncols = j-i;
711         else       ncols = n-i;
712         /* Now assemble all these values with a single function call */
713         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
714 
715         i = j;
716       }
717     }
718     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
719   }
720   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
721   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
722 
723   /* determine if any processor has disassembled, if so we must
724      also disassemble ourselfs, in order that we may reassemble. */
725   /*
726      if nonzero structure of submatrix B cannot change then we know that
727      no processor disassembled thus we can skip this stuff
728   */
729   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
730     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
731     if (mat->was_assembled && !other_disassembled) {
732       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
733     }
734   }
735   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
736     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
737   }
738   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
739   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
740   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
741 
742   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
743 
744   aij->rowvalues = 0;
745 
746   /* used by MatAXPY() */
747   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
748   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
749 
750   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
751   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
752 
753   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
754   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
755     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
756     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
757   }
758   PetscFunctionReturn(0);
759 }
760 
761 #undef __FUNCT__
762 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
764 {
765   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
766   PetscErrorCode ierr;
767 
768   PetscFunctionBegin;
769   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
770   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
771   PetscFunctionReturn(0);
772 }
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatZeroRows_MPIAIJ"
776 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
777 {
778   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
779   PetscInt      *owners = A->rmap->range;
780   PetscInt       n      = A->rmap->n;
781   PetscMPIInt    size   = mat->size;
782   PetscSF        sf;
783   PetscInt      *lrows;
784   PetscSFNode   *rrows;
785   PetscInt       lastidx = -1, r, p = 0, len = 0;
786   PetscErrorCode ierr;
787 
788   PetscFunctionBegin;
789   /* Create SF where leaves are input rows and roots are owned rows */
790   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
791   for (r = 0; r < n; ++r) lrows[r] = -1;
792   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
793   for (r = 0; r < N; ++r) {
794     const PetscInt idx   = rows[r];
795     PetscBool      found = PETSC_FALSE;
796     /* Trick for efficient searching for sorted rows */
797     if (lastidx > idx) p = 0;
798     lastidx = idx;
799     for (; p < size; ++p) {
800       if (idx >= owners[p] && idx < owners[p+1]) {
801         rrows[r].rank  = p;
802         rrows[r].index = rows[r] - owners[p];
803         found = PETSC_TRUE;
804         break;
805       }
806     }
807     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
808   }
809   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
810   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
811   /* Collect flags for rows to be zeroed */
812   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
813   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
814   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
815   /* Compress and put in row numbers */
816   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
817   /* fix right hand side if needed */
818   if (x && b) {
819     const PetscScalar *xx;
820     PetscScalar       *bb;
821 
822     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
823     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
824     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
825     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
826     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
827   }
828   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
829   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
830   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
831     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
832   } else if (diag != 0.0) {
833     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
834     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
835     for (r = 0; r < len; ++r) {
836       const PetscInt row = lrows[r] + A->rmap->rstart;
837       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
838     }
839     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
840     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
841   } else {
842     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
843   }
844   ierr = PetscFree(lrows);CHKERRQ(ierr);
845 
846   /* only change matrix nonzero state if pattern was allowed to be changed */
847   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
848     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
849     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
856 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
857 {
858   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
859   PetscErrorCode    ierr;
860   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
861   PetscInt          i,j,r,m,p = 0,len = 0;
862   PetscInt          *lrows,*owners = A->rmap->range;
863   PetscSFNode       *rrows;
864   PetscSF           sf;
865   const PetscScalar *xx;
866   PetscScalar       *bb,*mask;
867   Vec               xmask,lmask;
868   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
869   const PetscInt    *aj, *ii,*ridx;
870   PetscScalar       *aa;
871 #if defined(PETSC_DEBUG)
872   PetscBool found = PETSC_FALSE;
873 #endif
874 
875   PetscFunctionBegin;
876   /* Create SF where leaves are input rows and roots are owned rows */
877   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
878   for (r = 0; r < n; ++r) lrows[r] = -1;
879   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
880   for (r = 0; r < N; ++r) {
881     const PetscInt idx   = rows[r];
882     PetscBool      found = PETSC_FALSE;
883     /* Trick for efficient searching for sorted rows */
884     if (lastidx > idx) p = 0;
885     lastidx = idx;
886     for (; p < size; ++p) {
887       if (idx >= owners[p] && idx < owners[p+1]) {
888         rrows[r].rank  = p;
889         rrows[r].index = rows[r] - owners[p];
890         found = PETSC_TRUE;
891         break;
892       }
893     }
894     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
895   }
896   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
897   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
898   /* Collect flags for rows to be zeroed */
899   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
900   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
901   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
902   /* Compress and put in row numbers */
903   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
904   /* zero diagonal part of matrix */
905   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
906   /* handle off diagonal part of matrix */
907   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
908   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
909   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
910   for (i=0; i<len; i++) bb[lrows[i]] = 1;
911   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
912   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
915   if (x) {
916     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
919     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
920   }
921   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
922   /* remove zeroed rows of off diagonal matrix */
923   ii = aij->i;
924   for (i=0; i<len; i++) {
925     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
926   }
927   /* loop over all elements of off process part of matrix zeroing removed columns*/
928   if (aij->compressedrow.use) {
929     m    = aij->compressedrow.nrows;
930     ii   = aij->compressedrow.i;
931     ridx = aij->compressedrow.rindex;
932     for (i=0; i<m; i++) {
933       n  = ii[i+1] - ii[i];
934       aj = aij->j + ii[i];
935       aa = aij->a + ii[i];
936 
937       for (j=0; j<n; j++) {
938         if (PetscAbsScalar(mask[*aj])) {
939           if (b) bb[*ridx] -= *aa*xx[*aj];
940           *aa = 0.0;
941         }
942         aa++;
943         aj++;
944       }
945       ridx++;
946     }
947   } else { /* do not use compressed row format */
948     m = l->B->rmap->n;
949     for (i=0; i<m; i++) {
950       n  = ii[i+1] - ii[i];
951       aj = aij->j + ii[i];
952       aa = aij->a + ii[i];
953       for (j=0; j<n; j++) {
954         if (PetscAbsScalar(mask[*aj])) {
955           if (b) bb[i] -= *aa*xx[*aj];
956           *aa = 0.0;
957         }
958         aa++;
959         aj++;
960       }
961     }
962   }
963   if (x) {
964     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
965     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
966   }
967   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
968   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
969   ierr = PetscFree(lrows);CHKERRQ(ierr);
970 
971   /* only change matrix nonzero state if pattern was allowed to be changed */
972   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
973     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
974     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatMult_MPIAIJ"
981 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
982 {
983   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode ierr;
985   PetscInt       nt;
986 
987   PetscFunctionBegin;
988   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
989   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
990   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
992   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 #undef __FUNCT__
998 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
999 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1000 {
1001   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1002   PetscErrorCode ierr;
1003 
1004   PetscFunctionBegin;
1005   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1011 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1012 {
1013   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1014   PetscErrorCode ierr;
1015 
1016   PetscFunctionBegin;
1017   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1018   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1019   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1020   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1026 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1027 {
1028   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1029   PetscErrorCode ierr;
1030   PetscBool      merged;
1031 
1032   PetscFunctionBegin;
1033   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1034   /* do nondiagonal part */
1035   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1036   if (!merged) {
1037     /* send it on its way */
1038     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1039     /* do local part */
1040     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1041     /* receive remote parts: note this assumes the values are not actually */
1042     /* added in yy until the next line, */
1043     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1044   } else {
1045     /* do local part */
1046     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1047     /* send it on its way */
1048     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1049     /* values actually were received in the Begin() but we need to call this nop */
1050     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051   }
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1057 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1058 {
1059   MPI_Comm       comm;
1060   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1061   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1062   IS             Me,Notme;
1063   PetscErrorCode ierr;
1064   PetscInt       M,N,first,last,*notme,i;
1065   PetscMPIInt    size;
1066 
1067   PetscFunctionBegin;
1068   /* Easy test: symmetric diagonal block */
1069   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1070   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1071   if (!*f) PetscFunctionReturn(0);
1072   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1073   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1074   if (size == 1) PetscFunctionReturn(0);
1075 
1076   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1077   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1078   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1079   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1080   for (i=0; i<first; i++) notme[i] = i;
1081   for (i=last; i<M; i++) notme[i-last+first] = i;
1082   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1083   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1084   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1085   Aoff = Aoffs[0];
1086   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1087   Boff = Boffs[0];
1088   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1089   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1090   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1091   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1092   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1093   ierr = PetscFree(notme);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1099 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1100 {
1101   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   /* do nondiagonal part */
1106   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1107   /* send it on its way */
1108   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1109   /* do local part */
1110   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1111   /* receive remote parts */
1112   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 /*
1117   This only works correctly for square matrices where the subblock A->A is the
1118    diagonal block
1119 */
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1122 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1123 {
1124   PetscErrorCode ierr;
1125   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1126 
1127   PetscFunctionBegin;
1128   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1129   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1130   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatScale_MPIAIJ"
1136 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1137 {
1138   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1139   PetscErrorCode ierr;
1140 
1141   PetscFunctionBegin;
1142   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1143   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatDestroy_MPIAIJ"
1149 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1150 {
1151   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1152   PetscErrorCode ierr;
1153 
1154   PetscFunctionBegin;
1155 #if defined(PETSC_USE_LOG)
1156   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1157 #endif
1158   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1159   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1160   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1161   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1162 #if defined(PETSC_USE_CTABLE)
1163   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1164 #else
1165   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1166 #endif
1167   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1168   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1169   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1170   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1171   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1172   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1173 
1174   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1188 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1189 {
1190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1191   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1192   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1193   PetscErrorCode ierr;
1194   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1195   int            fd;
1196   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1197   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1198   PetscScalar    *column_values;
1199   PetscInt       message_count,flowcontrolcount;
1200   FILE           *file;
1201 
1202   PetscFunctionBegin;
1203   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1204   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1205   nz   = A->nz + B->nz;
1206   if (!rank) {
1207     header[0] = MAT_FILE_CLASSID;
1208     header[1] = mat->rmap->N;
1209     header[2] = mat->cmap->N;
1210 
1211     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1213     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1214     /* get largest number of rows any processor has */
1215     rlen  = mat->rmap->n;
1216     range = mat->rmap->range;
1217     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1218   } else {
1219     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1220     rlen = mat->rmap->n;
1221   }
1222 
1223   /* load up the local row counts */
1224   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1225   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1226 
1227   /* store the row lengths to the file */
1228   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1229   if (!rank) {
1230     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1231     for (i=1; i<size; i++) {
1232       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1233       rlen = range[i+1] - range[i];
1234       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1235       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1236     }
1237     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1238   } else {
1239     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1240     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1242   }
1243   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1244 
1245   /* load up the local column indices */
1246   nzmax = nz; /* th processor needs space a largest processor needs */
1247   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1248   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1249   cnt   = 0;
1250   for (i=0; i<mat->rmap->n; i++) {
1251     for (j=B->i[i]; j<B->i[i+1]; j++) {
1252       if ((col = garray[B->j[j]]) > cstart) break;
1253       column_indices[cnt++] = col;
1254     }
1255     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1256     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1257   }
1258   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1259 
1260   /* store the column indices to the file */
1261   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1262   if (!rank) {
1263     MPI_Status status;
1264     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1265     for (i=1; i<size; i++) {
1266       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1267       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1268       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1269       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1271     }
1272     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1273   } else {
1274     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1275     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1277     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1278   }
1279   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1280 
1281   /* load up the local column values */
1282   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1283   cnt  = 0;
1284   for (i=0; i<mat->rmap->n; i++) {
1285     for (j=B->i[i]; j<B->i[i+1]; j++) {
1286       if (garray[B->j[j]] > cstart) break;
1287       column_values[cnt++] = B->a[j];
1288     }
1289     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1290     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1291   }
1292   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1293 
1294   /* store the column values to the file */
1295   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1296   if (!rank) {
1297     MPI_Status status;
1298     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1299     for (i=1; i<size; i++) {
1300       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1301       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1302       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1303       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1304       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1305     }
1306     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1307   } else {
1308     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1309     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1311     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1312   }
1313   ierr = PetscFree(column_values);CHKERRQ(ierr);
1314 
1315   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1316   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #include <petscdraw.h>
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1323 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1324 {
1325   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1326   PetscErrorCode    ierr;
1327   PetscMPIInt       rank = aij->rank,size = aij->size;
1328   PetscBool         isdraw,iascii,isbinary;
1329   PetscViewer       sviewer;
1330   PetscViewerFormat format;
1331 
1332   PetscFunctionBegin;
1333   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1334   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1335   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1336   if (iascii) {
1337     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1338     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1339       MatInfo   info;
1340       PetscBool inodes;
1341 
1342       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1343       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1344       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1345       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1346       if (!inodes) {
1347         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1348                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1349       } else {
1350         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1351                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1352       }
1353       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1354       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1355       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1356       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1357       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1358       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1359       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1360       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1361       PetscFunctionReturn(0);
1362     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1363       PetscInt inodecount,inodelimit,*inodes;
1364       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1365       if (inodes) {
1366         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1367       } else {
1368         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1369       }
1370       PetscFunctionReturn(0);
1371     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1372       PetscFunctionReturn(0);
1373     }
1374   } else if (isbinary) {
1375     if (size == 1) {
1376       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1377       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1378     } else {
1379       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1380     }
1381     PetscFunctionReturn(0);
1382   } else if (isdraw) {
1383     PetscDraw draw;
1384     PetscBool isnull;
1385     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1386     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1387   }
1388 
1389   {
1390     /* assemble the entire matrix onto first processor. */
1391     Mat        A;
1392     Mat_SeqAIJ *Aloc;
1393     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1394     MatScalar  *a;
1395 
1396     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1397     if (!rank) {
1398       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1399     } else {
1400       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1401     }
1402     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1403     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1404     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1405     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1406     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1407 
1408     /* copy over the A part */
1409     Aloc = (Mat_SeqAIJ*)aij->A->data;
1410     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1411     row  = mat->rmap->rstart;
1412     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1413     for (i=0; i<m; i++) {
1414       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1415       row++;
1416       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1417     }
1418     aj = Aloc->j;
1419     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1420 
1421     /* copy over the B part */
1422     Aloc = (Mat_SeqAIJ*)aij->B->data;
1423     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1424     row  = mat->rmap->rstart;
1425     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1426     ct   = cols;
1427     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1428     for (i=0; i<m; i++) {
1429       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1430       row++;
1431       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1432     }
1433     ierr = PetscFree(ct);CHKERRQ(ierr);
1434     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1435     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1436     /*
1437        Everyone has to call to draw the matrix since the graphics waits are
1438        synchronized across all processors that share the PetscDraw object
1439     */
1440     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1441     if (!rank) {
1442       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1443     }
1444     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1445     ierr = MatDestroy(&A);CHKERRQ(ierr);
1446   }
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatView_MPIAIJ"
1452 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1453 {
1454   PetscErrorCode ierr;
1455   PetscBool      iascii,isdraw,issocket,isbinary;
1456 
1457   PetscFunctionBegin;
1458   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1459   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1461   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1462   if (iascii || isdraw || isbinary || issocket) {
1463     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1464   }
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatSOR_MPIAIJ"
1470 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1471 {
1472   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1473   PetscErrorCode ierr;
1474   Vec            bb1 = 0;
1475   PetscBool      hasop;
1476 
1477   PetscFunctionBegin;
1478   if (flag == SOR_APPLY_UPPER) {
1479     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1480     PetscFunctionReturn(0);
1481   }
1482 
1483   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1484     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1485   }
1486 
1487   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1488     if (flag & SOR_ZERO_INITIAL_GUESS) {
1489       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1490       its--;
1491     }
1492 
1493     while (its--) {
1494       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1495       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1496 
1497       /* update rhs: bb1 = bb - B*x */
1498       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1499       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1500 
1501       /* local sweep */
1502       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1503     }
1504   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1505     if (flag & SOR_ZERO_INITIAL_GUESS) {
1506       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1507       its--;
1508     }
1509     while (its--) {
1510       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1511       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1512 
1513       /* update rhs: bb1 = bb - B*x */
1514       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1515       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1516 
1517       /* local sweep */
1518       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1519     }
1520   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1521     if (flag & SOR_ZERO_INITIAL_GUESS) {
1522       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1523       its--;
1524     }
1525     while (its--) {
1526       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1527       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1528 
1529       /* update rhs: bb1 = bb - B*x */
1530       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1531       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1532 
1533       /* local sweep */
1534       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1535     }
1536   } else if (flag & SOR_EISENSTAT) {
1537     Vec xx1;
1538 
1539     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1540     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1541 
1542     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1543     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1544     if (!mat->diag) {
1545       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1546       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1547     }
1548     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1549     if (hasop) {
1550       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1551     } else {
1552       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1553     }
1554     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1555 
1556     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1557 
1558     /* local sweep */
1559     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1560     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1561     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1562   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1563 
1564   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatPermute_MPIAIJ"
1570 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1571 {
1572   Mat            aA,aB,Aperm;
1573   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1574   PetscScalar    *aa,*ba;
1575   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1576   PetscSF        rowsf,sf;
1577   IS             parcolp = NULL;
1578   PetscBool      done;
1579   PetscErrorCode ierr;
1580 
1581   PetscFunctionBegin;
1582   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1583   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1584   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1585   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1586 
1587   /* Invert row permutation to find out where my rows should go */
1588   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1589   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1590   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1591   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1592   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1593   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1594 
1595   /* Invert column permutation to find out where my columns should go */
1596   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1597   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1598   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1599   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1600   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1601   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1602   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1603 
1604   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1605   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1606   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1607 
1608   /* Find out where my gcols should go */
1609   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1610   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1611   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1612   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1613   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1614   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1615   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1616   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1617 
1618   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1619   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1620   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1621   for (i=0; i<m; i++) {
1622     PetscInt row = rdest[i],rowner;
1623     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1624     for (j=ai[i]; j<ai[i+1]; j++) {
1625       PetscInt cowner,col = cdest[aj[j]];
1626       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1627       if (rowner == cowner) dnnz[i]++;
1628       else onnz[i]++;
1629     }
1630     for (j=bi[i]; j<bi[i+1]; j++) {
1631       PetscInt cowner,col = gcdest[bj[j]];
1632       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1633       if (rowner == cowner) dnnz[i]++;
1634       else onnz[i]++;
1635     }
1636   }
1637   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1638   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1639   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1640   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1641   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1642 
1643   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1644   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1645   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1646   for (i=0; i<m; i++) {
1647     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1648     PetscInt j0,rowlen;
1649     rowlen = ai[i+1] - ai[i];
1650     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1651       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1652       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1653     }
1654     rowlen = bi[i+1] - bi[i];
1655     for (j0=j=0; j<rowlen; j0=j) {
1656       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1657       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1658     }
1659   }
1660   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1661   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1662   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1663   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1664   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1665   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1666   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1667   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1668   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1669   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1670   *B = Aperm;
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1676 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1677 {
1678   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1679   Mat            A    = mat->A,B = mat->B;
1680   PetscErrorCode ierr;
1681   PetscReal      isend[5],irecv[5];
1682 
1683   PetscFunctionBegin;
1684   info->block_size = 1.0;
1685   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1686 
1687   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1688   isend[3] = info->memory;  isend[4] = info->mallocs;
1689 
1690   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1691 
1692   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1693   isend[3] += info->memory;  isend[4] += info->mallocs;
1694   if (flag == MAT_LOCAL) {
1695     info->nz_used      = isend[0];
1696     info->nz_allocated = isend[1];
1697     info->nz_unneeded  = isend[2];
1698     info->memory       = isend[3];
1699     info->mallocs      = isend[4];
1700   } else if (flag == MAT_GLOBAL_MAX) {
1701     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1702 
1703     info->nz_used      = irecv[0];
1704     info->nz_allocated = irecv[1];
1705     info->nz_unneeded  = irecv[2];
1706     info->memory       = irecv[3];
1707     info->mallocs      = irecv[4];
1708   } else if (flag == MAT_GLOBAL_SUM) {
1709     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1710 
1711     info->nz_used      = irecv[0];
1712     info->nz_allocated = irecv[1];
1713     info->nz_unneeded  = irecv[2];
1714     info->memory       = irecv[3];
1715     info->mallocs      = irecv[4];
1716   }
1717   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1718   info->fill_ratio_needed = 0;
1719   info->factor_mallocs    = 0;
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 #undef __FUNCT__
1724 #define __FUNCT__ "MatSetOption_MPIAIJ"
1725 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1726 {
1727   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   switch (op) {
1732   case MAT_NEW_NONZERO_LOCATIONS:
1733   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1734   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1735   case MAT_KEEP_NONZERO_PATTERN:
1736   case MAT_NEW_NONZERO_LOCATION_ERR:
1737   case MAT_USE_INODES:
1738   case MAT_IGNORE_ZERO_ENTRIES:
1739     MatCheckPreallocated(A,1);
1740     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1741     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1742     break;
1743   case MAT_ROW_ORIENTED:
1744     a->roworiented = flg;
1745 
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1748     break;
1749   case MAT_NEW_DIAGONALS:
1750     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1751     break;
1752   case MAT_IGNORE_OFF_PROC_ENTRIES:
1753     a->donotstash = flg;
1754     break;
1755   case MAT_SPD:
1756     A->spd_set = PETSC_TRUE;
1757     A->spd     = flg;
1758     if (flg) {
1759       A->symmetric                  = PETSC_TRUE;
1760       A->structurally_symmetric     = PETSC_TRUE;
1761       A->symmetric_set              = PETSC_TRUE;
1762       A->structurally_symmetric_set = PETSC_TRUE;
1763     }
1764     break;
1765   case MAT_SYMMETRIC:
1766     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1767     break;
1768   case MAT_STRUCTURALLY_SYMMETRIC:
1769     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1770     break;
1771   case MAT_HERMITIAN:
1772     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1773     break;
1774   case MAT_SYMMETRY_ETERNAL:
1775     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1776     break;
1777   default:
1778     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1779   }
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 #undef __FUNCT__
1784 #define __FUNCT__ "MatGetRow_MPIAIJ"
1785 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1786 {
1787   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1788   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1789   PetscErrorCode ierr;
1790   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1791   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1792   PetscInt       *cmap,*idx_p;
1793 
1794   PetscFunctionBegin;
1795   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1796   mat->getrowactive = PETSC_TRUE;
1797 
1798   if (!mat->rowvalues && (idx || v)) {
1799     /*
1800         allocate enough space to hold information from the longest row.
1801     */
1802     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1803     PetscInt   max = 1,tmp;
1804     for (i=0; i<matin->rmap->n; i++) {
1805       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1806       if (max < tmp) max = tmp;
1807     }
1808     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1809   }
1810 
1811   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1812   lrow = row - rstart;
1813 
1814   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1815   if (!v)   {pvA = 0; pvB = 0;}
1816   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1817   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1818   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1819   nztot = nzA + nzB;
1820 
1821   cmap = mat->garray;
1822   if (v  || idx) {
1823     if (nztot) {
1824       /* Sort by increasing column numbers, assuming A and B already sorted */
1825       PetscInt imark = -1;
1826       if (v) {
1827         *v = v_p = mat->rowvalues;
1828         for (i=0; i<nzB; i++) {
1829           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1830           else break;
1831         }
1832         imark = i;
1833         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1834         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1835       }
1836       if (idx) {
1837         *idx = idx_p = mat->rowindices;
1838         if (imark > -1) {
1839           for (i=0; i<imark; i++) {
1840             idx_p[i] = cmap[cworkB[i]];
1841           }
1842         } else {
1843           for (i=0; i<nzB; i++) {
1844             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1845             else break;
1846           }
1847           imark = i;
1848         }
1849         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1850         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1851       }
1852     } else {
1853       if (idx) *idx = 0;
1854       if (v)   *v   = 0;
1855     }
1856   }
1857   *nz  = nztot;
1858   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1859   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1865 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1866 {
1867   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1868 
1869   PetscFunctionBegin;
1870   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1871   aij->getrowactive = PETSC_FALSE;
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #undef __FUNCT__
1876 #define __FUNCT__ "MatNorm_MPIAIJ"
1877 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1878 {
1879   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1880   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1881   PetscErrorCode ierr;
1882   PetscInt       i,j,cstart = mat->cmap->rstart;
1883   PetscReal      sum = 0.0;
1884   MatScalar      *v;
1885 
1886   PetscFunctionBegin;
1887   if (aij->size == 1) {
1888     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1889   } else {
1890     if (type == NORM_FROBENIUS) {
1891       v = amat->a;
1892       for (i=0; i<amat->nz; i++) {
1893         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1894       }
1895       v = bmat->a;
1896       for (i=0; i<bmat->nz; i++) {
1897         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1898       }
1899       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1900       *norm = PetscSqrtReal(*norm);
1901     } else if (type == NORM_1) { /* max column norm */
1902       PetscReal *tmp,*tmp2;
1903       PetscInt  *jj,*garray = aij->garray;
1904       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1905       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1906       *norm = 0.0;
1907       v     = amat->a; jj = amat->j;
1908       for (j=0; j<amat->nz; j++) {
1909         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1910       }
1911       v = bmat->a; jj = bmat->j;
1912       for (j=0; j<bmat->nz; j++) {
1913         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1914       }
1915       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1916       for (j=0; j<mat->cmap->N; j++) {
1917         if (tmp2[j] > *norm) *norm = tmp2[j];
1918       }
1919       ierr = PetscFree(tmp);CHKERRQ(ierr);
1920       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1921     } else if (type == NORM_INFINITY) { /* max row norm */
1922       PetscReal ntemp = 0.0;
1923       for (j=0; j<aij->A->rmap->n; j++) {
1924         v   = amat->a + amat->i[j];
1925         sum = 0.0;
1926         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1927           sum += PetscAbsScalar(*v); v++;
1928         }
1929         v = bmat->a + bmat->i[j];
1930         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1931           sum += PetscAbsScalar(*v); v++;
1932         }
1933         if (sum > ntemp) ntemp = sum;
1934       }
1935       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1936     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1937   }
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatTranspose_MPIAIJ"
1943 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1944 {
1945   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1946   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1947   PetscErrorCode ierr;
1948   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1949   PetscInt       cstart = A->cmap->rstart,ncol;
1950   Mat            B;
1951   MatScalar      *array;
1952 
1953   PetscFunctionBegin;
1954   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1955 
1956   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1957   ai = Aloc->i; aj = Aloc->j;
1958   bi = Bloc->i; bj = Bloc->j;
1959   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1960     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1961     PetscSFNode          *oloc;
1962     PETSC_UNUSED PetscSF sf;
1963 
1964     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1965     /* compute d_nnz for preallocation */
1966     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1967     for (i=0; i<ai[ma]; i++) {
1968       d_nnz[aj[i]]++;
1969       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1970     }
1971     /* compute local off-diagonal contributions */
1972     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1973     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1974     /* map those to global */
1975     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1976     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1977     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1978     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1979     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1980     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1981     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1982 
1983     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1984     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1985     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1986     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1987     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1988     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1989   } else {
1990     B    = *matout;
1991     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1992     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1993   }
1994 
1995   /* copy over the A part */
1996   array = Aloc->a;
1997   row   = A->rmap->rstart;
1998   for (i=0; i<ma; i++) {
1999     ncol = ai[i+1]-ai[i];
2000     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2001     row++;
2002     array += ncol; aj += ncol;
2003   }
2004   aj = Aloc->j;
2005   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2006 
2007   /* copy over the B part */
2008   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2009   array = Bloc->a;
2010   row   = A->rmap->rstart;
2011   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2012   cols_tmp = cols;
2013   for (i=0; i<mb; i++) {
2014     ncol = bi[i+1]-bi[i];
2015     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2016     row++;
2017     array += ncol; cols_tmp += ncol;
2018   }
2019   ierr = PetscFree(cols);CHKERRQ(ierr);
2020 
2021   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2022   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2023   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2024     *matout = B;
2025   } else {
2026     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2027   }
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #undef __FUNCT__
2032 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2033 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2034 {
2035   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2036   Mat            a    = aij->A,b = aij->B;
2037   PetscErrorCode ierr;
2038   PetscInt       s1,s2,s3;
2039 
2040   PetscFunctionBegin;
2041   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2042   if (rr) {
2043     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2044     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2045     /* Overlap communication with computation. */
2046     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2047   }
2048   if (ll) {
2049     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2050     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2051     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2052   }
2053   /* scale  the diagonal block */
2054   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2055 
2056   if (rr) {
2057     /* Do a scatter end and then right scale the off-diagonal block */
2058     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2059     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2060   }
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2066 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2067 {
2068   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2069   PetscErrorCode ierr;
2070 
2071   PetscFunctionBegin;
2072   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatEqual_MPIAIJ"
2078 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2079 {
2080   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2081   Mat            a,b,c,d;
2082   PetscBool      flg;
2083   PetscErrorCode ierr;
2084 
2085   PetscFunctionBegin;
2086   a = matA->A; b = matA->B;
2087   c = matB->A; d = matB->B;
2088 
2089   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2090   if (flg) {
2091     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2092   }
2093   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatCopy_MPIAIJ"
2099 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2100 {
2101   PetscErrorCode ierr;
2102   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2103   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2104 
2105   PetscFunctionBegin;
2106   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2107   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2108     /* because of the column compression in the off-processor part of the matrix a->B,
2109        the number of columns in a->B and b->B may be different, hence we cannot call
2110        the MatCopy() directly on the two parts. If need be, we can provide a more
2111        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2112        then copying the submatrices */
2113     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2114   } else {
2115     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2116     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2117   }
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatSetUp_MPIAIJ"
2123 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2124 {
2125   PetscErrorCode ierr;
2126 
2127   PetscFunctionBegin;
2128   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNCT__
2133 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2134 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2135 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2136 {
2137   PetscInt       i,m=Y->rmap->N;
2138   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2139   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2140   const PetscInt *xi = x->i,*yi = y->i;
2141 
2142   PetscFunctionBegin;
2143   /* Set the number of nonzeros in the new matrix */
2144   for (i=0; i<m; i++) {
2145     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2146     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2147     nnz[i] = 0;
2148     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2149       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2150       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2151       nnz[i]++;
2152     }
2153     for (; k<nzy; k++) nnz[i]++;
2154   }
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatAXPY_MPIAIJ"
2160 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2161 {
2162   PetscErrorCode ierr;
2163   PetscInt       i;
2164   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2165   PetscBLASInt   bnz,one=1;
2166   Mat_SeqAIJ     *x,*y;
2167 
2168   PetscFunctionBegin;
2169   if (str == SAME_NONZERO_PATTERN) {
2170     PetscScalar alpha = a;
2171     x    = (Mat_SeqAIJ*)xx->A->data;
2172     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2173     y    = (Mat_SeqAIJ*)yy->A->data;
2174     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2175     x    = (Mat_SeqAIJ*)xx->B->data;
2176     y    = (Mat_SeqAIJ*)yy->B->data;
2177     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2178     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2179     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2180   } else if (str == SUBSET_NONZERO_PATTERN) {
2181     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2182 
2183     x = (Mat_SeqAIJ*)xx->B->data;
2184     y = (Mat_SeqAIJ*)yy->B->data;
2185     if (y->xtoy && y->XtoY != xx->B) {
2186       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2187       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2188     }
2189     if (!y->xtoy) { /* get xtoy */
2190       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2191       y->XtoY = xx->B;
2192       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2193     }
2194     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2195     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2196   } else {
2197     Mat      B;
2198     PetscInt *nnz_d,*nnz_o;
2199     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2200     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2201     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2202     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2203     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2204     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2205     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2206     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2207     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2208     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2209     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2210     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2211     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2212     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2213   }
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2218 
2219 #undef __FUNCT__
2220 #define __FUNCT__ "MatConjugate_MPIAIJ"
2221 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2222 {
2223 #if defined(PETSC_USE_COMPLEX)
2224   PetscErrorCode ierr;
2225   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2226 
2227   PetscFunctionBegin;
2228   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2229   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2230 #else
2231   PetscFunctionBegin;
2232 #endif
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatRealPart_MPIAIJ"
2238 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2239 {
2240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2241   PetscErrorCode ierr;
2242 
2243   PetscFunctionBegin;
2244   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2245   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 #undef __FUNCT__
2250 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2251 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2252 {
2253   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2254   PetscErrorCode ierr;
2255 
2256   PetscFunctionBegin;
2257   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2258   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 #if defined(PETSC_HAVE_PBGL)
2263 
2264 #include <boost/parallel/mpi/bsp_process_group.hpp>
2265 #include <boost/graph/distributed/ilu_default_graph.hpp>
2266 #include <boost/graph/distributed/ilu_0_block.hpp>
2267 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2268 #include <boost/graph/distributed/petsc/interface.hpp>
2269 #include <boost/multi_array.hpp>
2270 #include <boost/parallel/distributed_property_map->hpp>
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2274 /*
2275   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2276 */
2277 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2278 {
2279   namespace petsc = boost::distributed::petsc;
2280 
2281   namespace graph_dist = boost::graph::distributed;
2282   using boost::graph::distributed::ilu_default::process_group_type;
2283   using boost::graph::ilu_permuted;
2284 
2285   PetscBool      row_identity, col_identity;
2286   PetscContainer c;
2287   PetscInt       m, n, M, N;
2288   PetscErrorCode ierr;
2289 
2290   PetscFunctionBegin;
2291   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2292   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2293   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2294   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2295 
2296   process_group_type pg;
2297   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2298   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2299   lgraph_type& level_graph = *lgraph_p;
2300   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2301 
2302   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2303   ilu_permuted(level_graph);
2304 
2305   /* put together the new matrix */
2306   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2307   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2308   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2309   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2310   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2311   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2312   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2313   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2314 
2315   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2316   ierr = PetscContainerSetPointer(c, lgraph_p);
2317   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2318   ierr = PetscContainerDestroy(&c);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2324 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2325 {
2326   PetscFunctionBegin;
2327   PetscFunctionReturn(0);
2328 }
2329 
2330 #undef __FUNCT__
2331 #define __FUNCT__ "MatSolve_MPIAIJ"
2332 /*
2333   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2334 */
2335 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2336 {
2337   namespace graph_dist = boost::graph::distributed;
2338 
2339   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2340   lgraph_type    *lgraph_p;
2341   PetscContainer c;
2342   PetscErrorCode ierr;
2343 
2344   PetscFunctionBegin;
2345   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2346   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2347   ierr = VecCopy(b, x);CHKERRQ(ierr);
2348 
2349   PetscScalar *array_x;
2350   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2351   PetscInt sx;
2352   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2353 
2354   PetscScalar *array_b;
2355   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2356   PetscInt sb;
2357   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2358 
2359   lgraph_type& level_graph = *lgraph_p;
2360   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2361 
2362   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2363   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2364   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2365 
2366   typedef boost::iterator_property_map<array_ref_type::iterator,
2367                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2368   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2369   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2370 
2371   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2372   PetscFunctionReturn(0);
2373 }
2374 #endif
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatDestroy_MatRedundant"
2378 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2379 {
2380   PetscErrorCode ierr;
2381   Mat_Redundant  *redund;
2382   PetscInt       i;
2383   PetscMPIInt    size;
2384 
2385   PetscFunctionBegin;
2386   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2387   if (size == 1) {
2388     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2389     redund = a->redundant;
2390   } else {
2391     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2392     redund = a->redundant;
2393   }
2394   if (redund){
2395     if (redund->matseq) { /* via MatGetSubMatrices()  */
2396       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2397       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2398       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2399       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2400     } else {
2401       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2402       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2403       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2404       for (i=0; i<redund->nrecvs; i++) {
2405         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2406         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2407       }
2408       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2409     }
2410 
2411     if (redund->psubcomm) {
2412       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2413     }
2414     ierr = redund->Destroy(A);CHKERRQ(ierr);
2415     ierr = PetscFree(redund);CHKERRQ(ierr);
2416   }
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2422 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2423 {
2424   PetscMPIInt    rank,size;
2425   MPI_Comm       comm;
2426   PetscErrorCode ierr;
2427   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2428   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2429   PetscInt       *rowrange = mat->rmap->range;
2430   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2431   Mat            A = aij->A,B=aij->B,C=*matredundant;
2432   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2433   PetscScalar    *sbuf_a;
2434   PetscInt       nzlocal=a->nz+b->nz;
2435   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2436   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2437   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2438   MatScalar      *aworkA,*aworkB;
2439   PetscScalar    *vals;
2440   PetscMPIInt    tag1,tag2,tag3,imdex;
2441   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2442   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2443   MPI_Status     recv_status,*send_status;
2444   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2445   PetscInt       **rbuf_j=NULL;
2446   PetscScalar    **rbuf_a=NULL;
2447   Mat_Redundant  *redund =NULL;
2448 
2449   PetscFunctionBegin;
2450   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2451   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2452   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2453   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2454   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2455 
2456   if (reuse == MAT_REUSE_MATRIX) {
2457     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2458     if (subsize == 1) {
2459       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2460       redund = c->redundant;
2461     } else {
2462       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2463       redund = c->redundant;
2464     }
2465     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2466 
2467     nsends    = redund->nsends;
2468     nrecvs    = redund->nrecvs;
2469     send_rank = redund->send_rank;
2470     recv_rank = redund->recv_rank;
2471     sbuf_nz   = redund->sbuf_nz;
2472     rbuf_nz   = redund->rbuf_nz;
2473     sbuf_j    = redund->sbuf_j;
2474     sbuf_a    = redund->sbuf_a;
2475     rbuf_j    = redund->rbuf_j;
2476     rbuf_a    = redund->rbuf_a;
2477   }
2478 
2479   if (reuse == MAT_INITIAL_MATRIX) {
2480     PetscInt    nleftover,np_subcomm;
2481 
2482     /* get the destination processors' id send_rank, nsends and nrecvs */
2483     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2484 
2485     np_subcomm = size/nsubcomm;
2486     nleftover  = size - nsubcomm*np_subcomm;
2487 
2488     /* block of codes below is specific for INTERLACED */
2489     /* ------------------------------------------------*/
2490     nsends = 0; nrecvs = 0;
2491     for (i=0; i<size; i++) {
2492       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2493         send_rank[nsends++] = i;
2494         recv_rank[nrecvs++] = i;
2495       }
2496     }
2497     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2498       i = size-nleftover-1;
2499       j = 0;
2500       while (j < nsubcomm - nleftover) {
2501         send_rank[nsends++] = i;
2502         i--; j++;
2503       }
2504     }
2505 
2506     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2507       for (i=0; i<nleftover; i++) {
2508         recv_rank[nrecvs++] = size-nleftover+i;
2509       }
2510     }
2511     /*----------------------------------------------*/
2512 
2513     /* allocate sbuf_j, sbuf_a */
2514     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2515     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2516     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2517     /*
2518     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2519     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2520      */
2521   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2522 
2523   /* copy mat's local entries into the buffers */
2524   if (reuse == MAT_INITIAL_MATRIX) {
2525     rownz_max = 0;
2526     rptr      = sbuf_j;
2527     cols      = sbuf_j + rend-rstart + 1;
2528     vals      = sbuf_a;
2529     rptr[0]   = 0;
2530     for (i=0; i<rend-rstart; i++) {
2531       row    = i + rstart;
2532       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2533       ncols  = nzA + nzB;
2534       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2535       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2536       /* load the column indices for this row into cols */
2537       lwrite = 0;
2538       for (l=0; l<nzB; l++) {
2539         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2540           vals[lwrite]   = aworkB[l];
2541           cols[lwrite++] = ctmp;
2542         }
2543       }
2544       for (l=0; l<nzA; l++) {
2545         vals[lwrite]   = aworkA[l];
2546         cols[lwrite++] = cstart + cworkA[l];
2547       }
2548       for (l=0; l<nzB; l++) {
2549         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2550           vals[lwrite]   = aworkB[l];
2551           cols[lwrite++] = ctmp;
2552         }
2553       }
2554       vals     += ncols;
2555       cols     += ncols;
2556       rptr[i+1] = rptr[i] + ncols;
2557       if (rownz_max < ncols) rownz_max = ncols;
2558     }
2559     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2560   } else { /* only copy matrix values into sbuf_a */
2561     rptr    = sbuf_j;
2562     vals    = sbuf_a;
2563     rptr[0] = 0;
2564     for (i=0; i<rend-rstart; i++) {
2565       row    = i + rstart;
2566       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2567       ncols  = nzA + nzB;
2568       cworkB = b->j + b->i[i];
2569       aworkA = a->a + a->i[i];
2570       aworkB = b->a + b->i[i];
2571       lwrite = 0;
2572       for (l=0; l<nzB; l++) {
2573         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2574       }
2575       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2576       for (l=0; l<nzB; l++) {
2577         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2578       }
2579       vals     += ncols;
2580       rptr[i+1] = rptr[i] + ncols;
2581     }
2582   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2583 
2584   /* send nzlocal to others, and recv other's nzlocal */
2585   /*--------------------------------------------------*/
2586   if (reuse == MAT_INITIAL_MATRIX) {
2587     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2588 
2589     s_waits2 = s_waits3 + nsends;
2590     s_waits1 = s_waits2 + nsends;
2591     r_waits1 = s_waits1 + nsends;
2592     r_waits2 = r_waits1 + nrecvs;
2593     r_waits3 = r_waits2 + nrecvs;
2594   } else {
2595     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2596 
2597     r_waits3 = s_waits3 + nsends;
2598   }
2599 
2600   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2601   if (reuse == MAT_INITIAL_MATRIX) {
2602     /* get new tags to keep the communication clean */
2603     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2604     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2605     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2606 
2607     /* post receives of other's nzlocal */
2608     for (i=0; i<nrecvs; i++) {
2609       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2610     }
2611     /* send nzlocal to others */
2612     for (i=0; i<nsends; i++) {
2613       sbuf_nz[i] = nzlocal;
2614       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2615     }
2616     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2617     count = nrecvs;
2618     while (count) {
2619       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2620 
2621       recv_rank[imdex] = recv_status.MPI_SOURCE;
2622       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2623       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2624 
2625       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2626 
2627       rbuf_nz[imdex] += i + 2;
2628 
2629       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2630       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2631       count--;
2632     }
2633     /* wait on sends of nzlocal */
2634     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2635     /* send mat->i,j to others, and recv from other's */
2636     /*------------------------------------------------*/
2637     for (i=0; i<nsends; i++) {
2638       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2639       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2640     }
2641     /* wait on receives of mat->i,j */
2642     /*------------------------------*/
2643     count = nrecvs;
2644     while (count) {
2645       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2646       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2647       count--;
2648     }
2649     /* wait on sends of mat->i,j */
2650     /*---------------------------*/
2651     if (nsends) {
2652       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2653     }
2654   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2655 
2656   /* post receives, send and receive mat->a */
2657   /*----------------------------------------*/
2658   for (imdex=0; imdex<nrecvs; imdex++) {
2659     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2660   }
2661   for (i=0; i<nsends; i++) {
2662     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2663   }
2664   count = nrecvs;
2665   while (count) {
2666     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2667     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2668     count--;
2669   }
2670   if (nsends) {
2671     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2672   }
2673 
2674   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2675 
2676   /* create redundant matrix */
2677   /*-------------------------*/
2678   if (reuse == MAT_INITIAL_MATRIX) {
2679     const PetscInt *range;
2680     PetscInt       rstart_sub,rend_sub,mloc_sub;
2681 
2682     /* compute rownz_max for preallocation */
2683     for (imdex=0; imdex<nrecvs; imdex++) {
2684       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2685       rptr = rbuf_j[imdex];
2686       for (i=0; i<j; i++) {
2687         ncols = rptr[i+1] - rptr[i];
2688         if (rownz_max < ncols) rownz_max = ncols;
2689       }
2690     }
2691 
2692     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2693 
2694     /* get local size of redundant matrix
2695        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2696     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2697     rstart_sub = range[nsubcomm*subrank];
2698     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2699       rend_sub = range[nsubcomm*(subrank+1)];
2700     } else {
2701       rend_sub = mat->rmap->N;
2702     }
2703     mloc_sub = rend_sub - rstart_sub;
2704 
2705     if (M == N) {
2706       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2707     } else { /* non-square matrix */
2708       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2709     }
2710     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2711     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2712     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2713     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2714   } else {
2715     C = *matredundant;
2716   }
2717 
2718   /* insert local matrix entries */
2719   rptr = sbuf_j;
2720   cols = sbuf_j + rend-rstart + 1;
2721   vals = sbuf_a;
2722   for (i=0; i<rend-rstart; i++) {
2723     row   = i + rstart;
2724     ncols = rptr[i+1] - rptr[i];
2725     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2726     vals += ncols;
2727     cols += ncols;
2728   }
2729   /* insert received matrix entries */
2730   for (imdex=0; imdex<nrecvs; imdex++) {
2731     rstart = rowrange[recv_rank[imdex]];
2732     rend   = rowrange[recv_rank[imdex]+1];
2733     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2734     rptr   = rbuf_j[imdex];
2735     cols   = rbuf_j[imdex] + rend-rstart + 1;
2736     vals   = rbuf_a[imdex];
2737     for (i=0; i<rend-rstart; i++) {
2738       row   = i + rstart;
2739       ncols = rptr[i+1] - rptr[i];
2740       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2741       vals += ncols;
2742       cols += ncols;
2743     }
2744   }
2745   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2746   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2747 
2748   if (reuse == MAT_INITIAL_MATRIX) {
2749     *matredundant = C;
2750 
2751     /* create a supporting struct and attach it to C for reuse */
2752     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2753     if (subsize == 1) {
2754       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2755       c->redundant = redund;
2756     } else {
2757       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2758       c->redundant = redund;
2759     }
2760 
2761     redund->nzlocal   = nzlocal;
2762     redund->nsends    = nsends;
2763     redund->nrecvs    = nrecvs;
2764     redund->send_rank = send_rank;
2765     redund->recv_rank = recv_rank;
2766     redund->sbuf_nz   = sbuf_nz;
2767     redund->rbuf_nz   = rbuf_nz;
2768     redund->sbuf_j    = sbuf_j;
2769     redund->sbuf_a    = sbuf_a;
2770     redund->rbuf_j    = rbuf_j;
2771     redund->rbuf_a    = rbuf_a;
2772     redund->psubcomm  = NULL;
2773 
2774     redund->Destroy = C->ops->destroy;
2775     C->ops->destroy = MatDestroy_MatRedundant;
2776   }
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 #undef __FUNCT__
2781 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2782 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2783 {
2784   PetscErrorCode ierr;
2785   MPI_Comm       comm;
2786   PetscMPIInt    size,subsize;
2787   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2788   Mat_Redundant  *redund=NULL;
2789   PetscSubcomm   psubcomm=NULL;
2790   MPI_Comm       subcomm_in=subcomm;
2791   Mat            *matseq;
2792   IS             isrow,iscol;
2793 
2794   PetscFunctionBegin;
2795   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2796     if (reuse ==  MAT_INITIAL_MATRIX) {
2797       /* create psubcomm, then get subcomm */
2798       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2799       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2800       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2801 
2802       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2803       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2804       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2805       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2806       subcomm = psubcomm->comm;
2807     } else { /* retrieve psubcomm and subcomm */
2808       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2809       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2810       if (subsize == 1) {
2811         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2812         redund = c->redundant;
2813       } else {
2814         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2815         redund = c->redundant;
2816       }
2817       psubcomm = redund->psubcomm;
2818     }
2819     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2820       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2821       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2822         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2823         if (subsize == 1) {
2824           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2825           c->redundant->psubcomm = psubcomm;
2826         } else {
2827           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2828           c->redundant->psubcomm = psubcomm ;
2829         }
2830       }
2831       PetscFunctionReturn(0);
2832     }
2833   }
2834 
2835   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2836   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2837   if (reuse == MAT_INITIAL_MATRIX) {
2838     /* create a local sequential matrix matseq[0] */
2839     mloc_sub = PETSC_DECIDE;
2840     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2841     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2842     rstart = rend - mloc_sub;
2843     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2844     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2845   } else { /* reuse == MAT_REUSE_MATRIX */
2846     if (subsize == 1) {
2847       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2848       redund = c->redundant;
2849     } else {
2850       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2851       redund = c->redundant;
2852     }
2853 
2854     isrow  = redund->isrow;
2855     iscol  = redund->iscol;
2856     matseq = redund->matseq;
2857   }
2858   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2859   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2860 
2861   if (reuse == MAT_INITIAL_MATRIX) {
2862     /* create a supporting struct and attach it to C for reuse */
2863     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2864     if (subsize == 1) {
2865       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2866       c->redundant = redund;
2867     } else {
2868       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2869       c->redundant = redund;
2870     }
2871     redund->isrow    = isrow;
2872     redund->iscol    = iscol;
2873     redund->matseq   = matseq;
2874     redund->psubcomm = psubcomm;
2875     redund->Destroy               = (*matredundant)->ops->destroy;
2876     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2877   }
2878   PetscFunctionReturn(0);
2879 }
2880 
2881 #undef __FUNCT__
2882 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2883 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2884 {
2885   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2886   PetscErrorCode ierr;
2887   PetscInt       i,*idxb = 0;
2888   PetscScalar    *va,*vb;
2889   Vec            vtmp;
2890 
2891   PetscFunctionBegin;
2892   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2893   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2894   if (idx) {
2895     for (i=0; i<A->rmap->n; i++) {
2896       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2897     }
2898   }
2899 
2900   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2901   if (idx) {
2902     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2903   }
2904   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2905   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2906 
2907   for (i=0; i<A->rmap->n; i++) {
2908     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2909       va[i] = vb[i];
2910       if (idx) idx[i] = a->garray[idxb[i]];
2911     }
2912   }
2913 
2914   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2915   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2916   ierr = PetscFree(idxb);CHKERRQ(ierr);
2917   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2918   PetscFunctionReturn(0);
2919 }
2920 
2921 #undef __FUNCT__
2922 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2923 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2924 {
2925   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2926   PetscErrorCode ierr;
2927   PetscInt       i,*idxb = 0;
2928   PetscScalar    *va,*vb;
2929   Vec            vtmp;
2930 
2931   PetscFunctionBegin;
2932   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2933   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2934   if (idx) {
2935     for (i=0; i<A->cmap->n; i++) {
2936       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2937     }
2938   }
2939 
2940   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2941   if (idx) {
2942     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2943   }
2944   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2945   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2946 
2947   for (i=0; i<A->rmap->n; i++) {
2948     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2949       va[i] = vb[i];
2950       if (idx) idx[i] = a->garray[idxb[i]];
2951     }
2952   }
2953 
2954   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2955   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2956   ierr = PetscFree(idxb);CHKERRQ(ierr);
2957   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2958   PetscFunctionReturn(0);
2959 }
2960 
2961 #undef __FUNCT__
2962 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2963 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2964 {
2965   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2966   PetscInt       n      = A->rmap->n;
2967   PetscInt       cstart = A->cmap->rstart;
2968   PetscInt       *cmap  = mat->garray;
2969   PetscInt       *diagIdx, *offdiagIdx;
2970   Vec            diagV, offdiagV;
2971   PetscScalar    *a, *diagA, *offdiagA;
2972   PetscInt       r;
2973   PetscErrorCode ierr;
2974 
2975   PetscFunctionBegin;
2976   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2977   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2978   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2979   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2980   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2981   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2982   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2983   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2984   for (r = 0; r < n; ++r) {
2985     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2986       a[r]   = diagA[r];
2987       idx[r] = cstart + diagIdx[r];
2988     } else {
2989       a[r]   = offdiagA[r];
2990       idx[r] = cmap[offdiagIdx[r]];
2991     }
2992   }
2993   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2994   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2995   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2996   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2997   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2998   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2999   PetscFunctionReturn(0);
3000 }
3001 
3002 #undef __FUNCT__
3003 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3004 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3005 {
3006   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3007   PetscInt       n      = A->rmap->n;
3008   PetscInt       cstart = A->cmap->rstart;
3009   PetscInt       *cmap  = mat->garray;
3010   PetscInt       *diagIdx, *offdiagIdx;
3011   Vec            diagV, offdiagV;
3012   PetscScalar    *a, *diagA, *offdiagA;
3013   PetscInt       r;
3014   PetscErrorCode ierr;
3015 
3016   PetscFunctionBegin;
3017   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
3018   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3019   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3020   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3021   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3022   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3023   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3024   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3025   for (r = 0; r < n; ++r) {
3026     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3027       a[r]   = diagA[r];
3028       idx[r] = cstart + diagIdx[r];
3029     } else {
3030       a[r]   = offdiagA[r];
3031       idx[r] = cmap[offdiagIdx[r]];
3032     }
3033   }
3034   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3035   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3036   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3037   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3038   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3039   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3040   PetscFunctionReturn(0);
3041 }
3042 
3043 #undef __FUNCT__
3044 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3045 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3046 {
3047   PetscErrorCode ierr;
3048   Mat            *dummy;
3049 
3050   PetscFunctionBegin;
3051   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3052   *newmat = *dummy;
3053   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3054   PetscFunctionReturn(0);
3055 }
3056 
3057 #undef __FUNCT__
3058 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3059 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3060 {
3061   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3062   PetscErrorCode ierr;
3063 
3064   PetscFunctionBegin;
3065   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 #undef __FUNCT__
3070 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3071 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3072 {
3073   PetscErrorCode ierr;
3074   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3075 
3076   PetscFunctionBegin;
3077   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3078   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3079   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3080   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3081   PetscFunctionReturn(0);
3082 }
3083 
3084 /* -------------------------------------------------------------------*/
3085 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3086                                        MatGetRow_MPIAIJ,
3087                                        MatRestoreRow_MPIAIJ,
3088                                        MatMult_MPIAIJ,
3089                                 /* 4*/ MatMultAdd_MPIAIJ,
3090                                        MatMultTranspose_MPIAIJ,
3091                                        MatMultTransposeAdd_MPIAIJ,
3092 #if defined(PETSC_HAVE_PBGL)
3093                                        MatSolve_MPIAIJ,
3094 #else
3095                                        0,
3096 #endif
3097                                        0,
3098                                        0,
3099                                 /*10*/ 0,
3100                                        0,
3101                                        0,
3102                                        MatSOR_MPIAIJ,
3103                                        MatTranspose_MPIAIJ,
3104                                 /*15*/ MatGetInfo_MPIAIJ,
3105                                        MatEqual_MPIAIJ,
3106                                        MatGetDiagonal_MPIAIJ,
3107                                        MatDiagonalScale_MPIAIJ,
3108                                        MatNorm_MPIAIJ,
3109                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3110                                        MatAssemblyEnd_MPIAIJ,
3111                                        MatSetOption_MPIAIJ,
3112                                        MatZeroEntries_MPIAIJ,
3113                                 /*24*/ MatZeroRows_MPIAIJ,
3114                                        0,
3115 #if defined(PETSC_HAVE_PBGL)
3116                                        0,
3117 #else
3118                                        0,
3119 #endif
3120                                        0,
3121                                        0,
3122                                 /*29*/ MatSetUp_MPIAIJ,
3123 #if defined(PETSC_HAVE_PBGL)
3124                                        0,
3125 #else
3126                                        0,
3127 #endif
3128                                        0,
3129                                        0,
3130                                        0,
3131                                 /*34*/ MatDuplicate_MPIAIJ,
3132                                        0,
3133                                        0,
3134                                        0,
3135                                        0,
3136                                 /*39*/ MatAXPY_MPIAIJ,
3137                                        MatGetSubMatrices_MPIAIJ,
3138                                        MatIncreaseOverlap_MPIAIJ,
3139                                        MatGetValues_MPIAIJ,
3140                                        MatCopy_MPIAIJ,
3141                                 /*44*/ MatGetRowMax_MPIAIJ,
3142                                        MatScale_MPIAIJ,
3143                                        0,
3144                                        0,
3145                                        MatZeroRowsColumns_MPIAIJ,
3146                                 /*49*/ MatSetRandom_MPIAIJ,
3147                                        0,
3148                                        0,
3149                                        0,
3150                                        0,
3151                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3152                                        0,
3153                                        MatSetUnfactored_MPIAIJ,
3154                                        MatPermute_MPIAIJ,
3155                                        0,
3156                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3157                                        MatDestroy_MPIAIJ,
3158                                        MatView_MPIAIJ,
3159                                        0,
3160                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3161                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3162                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3163                                        0,
3164                                        0,
3165                                        0,
3166                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3167                                        MatGetRowMinAbs_MPIAIJ,
3168                                        0,
3169                                        MatSetColoring_MPIAIJ,
3170                                        0,
3171                                        MatSetValuesAdifor_MPIAIJ,
3172                                 /*75*/ MatFDColoringApply_AIJ,
3173                                        0,
3174                                        0,
3175                                        0,
3176                                        MatFindZeroDiagonals_MPIAIJ,
3177                                 /*80*/ 0,
3178                                        0,
3179                                        0,
3180                                 /*83*/ MatLoad_MPIAIJ,
3181                                        0,
3182                                        0,
3183                                        0,
3184                                        0,
3185                                        0,
3186                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3187                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3188                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3189                                        MatPtAP_MPIAIJ_MPIAIJ,
3190                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3191                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3192                                        0,
3193                                        0,
3194                                        0,
3195                                        0,
3196                                 /*99*/ 0,
3197                                        0,
3198                                        0,
3199                                        MatConjugate_MPIAIJ,
3200                                        0,
3201                                 /*104*/MatSetValuesRow_MPIAIJ,
3202                                        MatRealPart_MPIAIJ,
3203                                        MatImaginaryPart_MPIAIJ,
3204                                        0,
3205                                        0,
3206                                 /*109*/0,
3207                                        MatGetRedundantMatrix_MPIAIJ,
3208                                        MatGetRowMin_MPIAIJ,
3209                                        0,
3210                                        0,
3211                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3212                                        0,
3213                                        0,
3214                                        0,
3215                                        0,
3216                                 /*119*/0,
3217                                        0,
3218                                        0,
3219                                        0,
3220                                        MatGetMultiProcBlock_MPIAIJ,
3221                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3222                                        MatGetColumnNorms_MPIAIJ,
3223                                        MatInvertBlockDiagonal_MPIAIJ,
3224                                        0,
3225                                        MatGetSubMatricesParallel_MPIAIJ,
3226                                 /*129*/0,
3227                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3228                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3229                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3230                                        0,
3231                                 /*134*/0,
3232                                        0,
3233                                        0,
3234                                        0,
3235                                        0,
3236                                 /*139*/0,
3237                                        0,
3238                                        0,
3239                                        MatFDColoringSetUp_MPIXAIJ,
3240                                        MatFindOffBlockDiagonalEntries_MPIAIJ
3241 };
3242 
3243 /* ----------------------------------------------------------------------------------------*/
3244 
3245 #undef __FUNCT__
3246 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3247 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3248 {
3249   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3250   PetscErrorCode ierr;
3251 
3252   PetscFunctionBegin;
3253   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3254   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3255   PetscFunctionReturn(0);
3256 }
3257 
3258 #undef __FUNCT__
3259 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3260 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3261 {
3262   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3263   PetscErrorCode ierr;
3264 
3265   PetscFunctionBegin;
3266   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3267   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3268   PetscFunctionReturn(0);
3269 }
3270 
3271 #undef __FUNCT__
3272 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3273 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3274 {
3275   Mat_MPIAIJ     *b;
3276   PetscErrorCode ierr;
3277 
3278   PetscFunctionBegin;
3279   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3280   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3281   b = (Mat_MPIAIJ*)B->data;
3282 
3283   if (!B->preallocated) {
3284     /* Explicitly create 2 MATSEQAIJ matrices. */
3285     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3286     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3287     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3288     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3289     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3290     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3291     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3292     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3293     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3294     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3295   }
3296 
3297   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3298   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3299   B->preallocated = PETSC_TRUE;
3300   PetscFunctionReturn(0);
3301 }
3302 
3303 #undef __FUNCT__
3304 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3305 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3306 {
3307   Mat            mat;
3308   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3309   PetscErrorCode ierr;
3310 
3311   PetscFunctionBegin;
3312   *newmat = 0;
3313   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3314   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3315   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3316   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3317   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3318   a       = (Mat_MPIAIJ*)mat->data;
3319 
3320   mat->factortype   = matin->factortype;
3321   mat->assembled    = PETSC_TRUE;
3322   mat->insertmode   = NOT_SET_VALUES;
3323   mat->preallocated = PETSC_TRUE;
3324 
3325   a->size         = oldmat->size;
3326   a->rank         = oldmat->rank;
3327   a->donotstash   = oldmat->donotstash;
3328   a->roworiented  = oldmat->roworiented;
3329   a->rowindices   = 0;
3330   a->rowvalues    = 0;
3331   a->getrowactive = PETSC_FALSE;
3332 
3333   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3334   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3335 
3336   if (oldmat->colmap) {
3337 #if defined(PETSC_USE_CTABLE)
3338     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3339 #else
3340     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3341     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3342     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3343 #endif
3344   } else a->colmap = 0;
3345   if (oldmat->garray) {
3346     PetscInt len;
3347     len  = oldmat->B->cmap->n;
3348     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3349     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3350     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3351   } else a->garray = 0;
3352 
3353   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3354   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3355   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3356   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3357   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3358   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3359   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3360   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3361   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3362   *newmat = mat;
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 
3367 
3368 #undef __FUNCT__
3369 #define __FUNCT__ "MatLoad_MPIAIJ"
3370 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3371 {
3372   PetscScalar    *vals,*svals;
3373   MPI_Comm       comm;
3374   PetscErrorCode ierr;
3375   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3376   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3377   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3378   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3379   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3380   int            fd;
3381   PetscInt       bs = 1;
3382 
3383   PetscFunctionBegin;
3384   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3385   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3386   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3387   if (!rank) {
3388     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3389     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3390     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3391   }
3392 
3393   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3394   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3395   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3396 
3397   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3398 
3399   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3400   M    = header[1]; N = header[2];
3401   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3402   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3403   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3404 
3405   /* If global sizes are set, check if they are consistent with that given in the file */
3406   if (sizesset) {
3407     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3408   }
3409   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3410   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3411 
3412   /* determine ownership of all (block) rows */
3413   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3414   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3415   else m = newMat->rmap->n; /* Set by user */
3416 
3417   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3418   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3419 
3420   /* First process needs enough room for process with most rows */
3421   if (!rank) {
3422     mmax = rowners[1];
3423     for (i=2; i<=size; i++) {
3424       mmax = PetscMax(mmax, rowners[i]);
3425     }
3426   } else mmax = -1;             /* unused, but compilers complain */
3427 
3428   rowners[0] = 0;
3429   for (i=2; i<=size; i++) {
3430     rowners[i] += rowners[i-1];
3431   }
3432   rstart = rowners[rank];
3433   rend   = rowners[rank+1];
3434 
3435   /* distribute row lengths to all processors */
3436   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3437   if (!rank) {
3438     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3439     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3440     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3441     for (j=0; j<m; j++) {
3442       procsnz[0] += ourlens[j];
3443     }
3444     for (i=1; i<size; i++) {
3445       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3446       /* calculate the number of nonzeros on each processor */
3447       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3448         procsnz[i] += rowlengths[j];
3449       }
3450       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3451     }
3452     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3453   } else {
3454     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3455   }
3456 
3457   if (!rank) {
3458     /* determine max buffer needed and allocate it */
3459     maxnz = 0;
3460     for (i=0; i<size; i++) {
3461       maxnz = PetscMax(maxnz,procsnz[i]);
3462     }
3463     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3464 
3465     /* read in my part of the matrix column indices  */
3466     nz   = procsnz[0];
3467     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3468     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3469 
3470     /* read in every one elses and ship off */
3471     for (i=1; i<size; i++) {
3472       nz   = procsnz[i];
3473       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3474       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3475     }
3476     ierr = PetscFree(cols);CHKERRQ(ierr);
3477   } else {
3478     /* determine buffer space needed for message */
3479     nz = 0;
3480     for (i=0; i<m; i++) {
3481       nz += ourlens[i];
3482     }
3483     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3484 
3485     /* receive message of column indices*/
3486     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3487   }
3488 
3489   /* determine column ownership if matrix is not square */
3490   if (N != M) {
3491     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3492     else n = newMat->cmap->n;
3493     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3494     cstart = cend - n;
3495   } else {
3496     cstart = rstart;
3497     cend   = rend;
3498     n      = cend - cstart;
3499   }
3500 
3501   /* loop over local rows, determining number of off diagonal entries */
3502   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3503   jj   = 0;
3504   for (i=0; i<m; i++) {
3505     for (j=0; j<ourlens[i]; j++) {
3506       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3507       jj++;
3508     }
3509   }
3510 
3511   for (i=0; i<m; i++) {
3512     ourlens[i] -= offlens[i];
3513   }
3514   if (!sizesset) {
3515     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3516   }
3517 
3518   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3519 
3520   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3521 
3522   for (i=0; i<m; i++) {
3523     ourlens[i] += offlens[i];
3524   }
3525 
3526   if (!rank) {
3527     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3528 
3529     /* read in my part of the matrix numerical values  */
3530     nz   = procsnz[0];
3531     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3532 
3533     /* insert into matrix */
3534     jj      = rstart;
3535     smycols = mycols;
3536     svals   = vals;
3537     for (i=0; i<m; i++) {
3538       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3539       smycols += ourlens[i];
3540       svals   += ourlens[i];
3541       jj++;
3542     }
3543 
3544     /* read in other processors and ship out */
3545     for (i=1; i<size; i++) {
3546       nz   = procsnz[i];
3547       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3548       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3549     }
3550     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3551   } else {
3552     /* receive numeric values */
3553     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3554 
3555     /* receive message of values*/
3556     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3557 
3558     /* insert into matrix */
3559     jj      = rstart;
3560     smycols = mycols;
3561     svals   = vals;
3562     for (i=0; i<m; i++) {
3563       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3564       smycols += ourlens[i];
3565       svals   += ourlens[i];
3566       jj++;
3567     }
3568   }
3569   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3570   ierr = PetscFree(vals);CHKERRQ(ierr);
3571   ierr = PetscFree(mycols);CHKERRQ(ierr);
3572   ierr = PetscFree(rowners);CHKERRQ(ierr);
3573   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3574   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3575   PetscFunctionReturn(0);
3576 }
3577 
3578 #undef __FUNCT__
3579 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3580 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3581 {
3582   PetscErrorCode ierr;
3583   IS             iscol_local;
3584   PetscInt       csize;
3585 
3586   PetscFunctionBegin;
3587   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3588   if (call == MAT_REUSE_MATRIX) {
3589     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3590     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3591   } else {
3592     PetscInt cbs;
3593     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3594     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3595     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3596   }
3597   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3598   if (call == MAT_INITIAL_MATRIX) {
3599     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3600     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3601   }
3602   PetscFunctionReturn(0);
3603 }
3604 
3605 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3606 #undef __FUNCT__
3607 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3608 /*
3609     Not great since it makes two copies of the submatrix, first an SeqAIJ
3610   in local and then by concatenating the local matrices the end result.
3611   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3612 
3613   Note: This requires a sequential iscol with all indices.
3614 */
3615 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3616 {
3617   PetscErrorCode ierr;
3618   PetscMPIInt    rank,size;
3619   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3620   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3621   PetscBool      allcolumns, colflag;
3622   Mat            M,Mreuse;
3623   MatScalar      *vwork,*aa;
3624   MPI_Comm       comm;
3625   Mat_SeqAIJ     *aij;
3626 
3627   PetscFunctionBegin;
3628   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3629   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3630   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3631 
3632   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3633   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3634   if (colflag && ncol == mat->cmap->N) {
3635     allcolumns = PETSC_TRUE;
3636   } else {
3637     allcolumns = PETSC_FALSE;
3638   }
3639   if (call ==  MAT_REUSE_MATRIX) {
3640     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3641     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3642     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3643   } else {
3644     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3645   }
3646 
3647   /*
3648       m - number of local rows
3649       n - number of columns (same on all processors)
3650       rstart - first row in new global matrix generated
3651   */
3652   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3653   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3654   if (call == MAT_INITIAL_MATRIX) {
3655     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3656     ii  = aij->i;
3657     jj  = aij->j;
3658 
3659     /*
3660         Determine the number of non-zeros in the diagonal and off-diagonal
3661         portions of the matrix in order to do correct preallocation
3662     */
3663 
3664     /* first get start and end of "diagonal" columns */
3665     if (csize == PETSC_DECIDE) {
3666       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3667       if (mglobal == n) { /* square matrix */
3668         nlocal = m;
3669       } else {
3670         nlocal = n/size + ((n % size) > rank);
3671       }
3672     } else {
3673       nlocal = csize;
3674     }
3675     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3676     rstart = rend - nlocal;
3677     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3678 
3679     /* next, compute all the lengths */
3680     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3681     olens = dlens + m;
3682     for (i=0; i<m; i++) {
3683       jend = ii[i+1] - ii[i];
3684       olen = 0;
3685       dlen = 0;
3686       for (j=0; j<jend; j++) {
3687         if (*jj < rstart || *jj >= rend) olen++;
3688         else dlen++;
3689         jj++;
3690       }
3691       olens[i] = olen;
3692       dlens[i] = dlen;
3693     }
3694     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3695     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3696     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3697     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3698     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3699     ierr = PetscFree(dlens);CHKERRQ(ierr);
3700   } else {
3701     PetscInt ml,nl;
3702 
3703     M    = *newmat;
3704     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3705     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3706     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3707     /*
3708          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3709        rather than the slower MatSetValues().
3710     */
3711     M->was_assembled = PETSC_TRUE;
3712     M->assembled     = PETSC_FALSE;
3713   }
3714   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3715   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3716   ii   = aij->i;
3717   jj   = aij->j;
3718   aa   = aij->a;
3719   for (i=0; i<m; i++) {
3720     row   = rstart + i;
3721     nz    = ii[i+1] - ii[i];
3722     cwork = jj;     jj += nz;
3723     vwork = aa;     aa += nz;
3724     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3725   }
3726 
3727   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3728   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3729   *newmat = M;
3730 
3731   /* save submatrix used in processor for next request */
3732   if (call ==  MAT_INITIAL_MATRIX) {
3733     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3734     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3735   }
3736   PetscFunctionReturn(0);
3737 }
3738 
3739 #undef __FUNCT__
3740 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3741 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3742 {
3743   PetscInt       m,cstart, cend,j,nnz,i,d;
3744   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3745   const PetscInt *JJ;
3746   PetscScalar    *values;
3747   PetscErrorCode ierr;
3748 
3749   PetscFunctionBegin;
3750   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3751 
3752   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3753   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3754   m      = B->rmap->n;
3755   cstart = B->cmap->rstart;
3756   cend   = B->cmap->rend;
3757   rstart = B->rmap->rstart;
3758 
3759   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3760 
3761 #if defined(PETSC_USE_DEBUGGING)
3762   for (i=0; i<m; i++) {
3763     nnz = Ii[i+1]- Ii[i];
3764     JJ  = J + Ii[i];
3765     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3766     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3767     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3768   }
3769 #endif
3770 
3771   for (i=0; i<m; i++) {
3772     nnz     = Ii[i+1]- Ii[i];
3773     JJ      = J + Ii[i];
3774     nnz_max = PetscMax(nnz_max,nnz);
3775     d       = 0;
3776     for (j=0; j<nnz; j++) {
3777       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3778     }
3779     d_nnz[i] = d;
3780     o_nnz[i] = nnz - d;
3781   }
3782   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3783   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3784 
3785   if (v) values = (PetscScalar*)v;
3786   else {
3787     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3788   }
3789 
3790   for (i=0; i<m; i++) {
3791     ii   = i + rstart;
3792     nnz  = Ii[i+1]- Ii[i];
3793     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3794   }
3795   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3796   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3797 
3798   if (!v) {
3799     ierr = PetscFree(values);CHKERRQ(ierr);
3800   }
3801   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3802   PetscFunctionReturn(0);
3803 }
3804 
3805 #undef __FUNCT__
3806 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3807 /*@
3808    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3809    (the default parallel PETSc format).
3810 
3811    Collective on MPI_Comm
3812 
3813    Input Parameters:
3814 +  B - the matrix
3815 .  i - the indices into j for the start of each local row (starts with zero)
3816 .  j - the column indices for each local row (starts with zero)
3817 -  v - optional values in the matrix
3818 
3819    Level: developer
3820 
3821    Notes:
3822        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3823      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3824      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3825 
3826        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3827 
3828        The format which is used for the sparse matrix input, is equivalent to a
3829     row-major ordering.. i.e for the following matrix, the input data expected is
3830     as shown:
3831 
3832         1 0 0
3833         2 0 3     P0
3834        -------
3835         4 5 6     P1
3836 
3837      Process0 [P0]: rows_owned=[0,1]
3838         i =  {0,1,3}  [size = nrow+1  = 2+1]
3839         j =  {0,0,2}  [size = nz = 6]
3840         v =  {1,2,3}  [size = nz = 6]
3841 
3842      Process1 [P1]: rows_owned=[2]
3843         i =  {0,3}    [size = nrow+1  = 1+1]
3844         j =  {0,1,2}  [size = nz = 6]
3845         v =  {4,5,6}  [size = nz = 6]
3846 
3847 .keywords: matrix, aij, compressed row, sparse, parallel
3848 
3849 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3850           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3851 @*/
3852 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3853 {
3854   PetscErrorCode ierr;
3855 
3856   PetscFunctionBegin;
3857   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3858   PetscFunctionReturn(0);
3859 }
3860 
3861 #undef __FUNCT__
3862 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3863 /*@C
3864    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3865    (the default parallel PETSc format).  For good matrix assembly performance
3866    the user should preallocate the matrix storage by setting the parameters
3867    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3868    performance can be increased by more than a factor of 50.
3869 
3870    Collective on MPI_Comm
3871 
3872    Input Parameters:
3873 +  A - the matrix
3874 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3875            (same value is used for all local rows)
3876 .  d_nnz - array containing the number of nonzeros in the various rows of the
3877            DIAGONAL portion of the local submatrix (possibly different for each row)
3878            or NULL, if d_nz is used to specify the nonzero structure.
3879            The size of this array is equal to the number of local rows, i.e 'm'.
3880            For matrices that will be factored, you must leave room for (and set)
3881            the diagonal entry even if it is zero.
3882 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3883            submatrix (same value is used for all local rows).
3884 -  o_nnz - array containing the number of nonzeros in the various rows of the
3885            OFF-DIAGONAL portion of the local submatrix (possibly different for
3886            each row) or NULL, if o_nz is used to specify the nonzero
3887            structure. The size of this array is equal to the number
3888            of local rows, i.e 'm'.
3889 
3890    If the *_nnz parameter is given then the *_nz parameter is ignored
3891 
3892    The AIJ format (also called the Yale sparse matrix format or
3893    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3894    storage.  The stored row and column indices begin with zero.
3895    See Users-Manual: ch_mat for details.
3896 
3897    The parallel matrix is partitioned such that the first m0 rows belong to
3898    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3899    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3900 
3901    The DIAGONAL portion of the local submatrix of a processor can be defined
3902    as the submatrix which is obtained by extraction the part corresponding to
3903    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3904    first row that belongs to the processor, r2 is the last row belonging to
3905    the this processor, and c1-c2 is range of indices of the local part of a
3906    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3907    common case of a square matrix, the row and column ranges are the same and
3908    the DIAGONAL part is also square. The remaining portion of the local
3909    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3910 
3911    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3912 
3913    You can call MatGetInfo() to get information on how effective the preallocation was;
3914    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3915    You can also run with the option -info and look for messages with the string
3916    malloc in them to see if additional memory allocation was needed.
3917 
3918    Example usage:
3919 
3920    Consider the following 8x8 matrix with 34 non-zero values, that is
3921    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3922    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3923    as follows:
3924 
3925 .vb
3926             1  2  0  |  0  3  0  |  0  4
3927     Proc0   0  5  6  |  7  0  0  |  8  0
3928             9  0 10  | 11  0  0  | 12  0
3929     -------------------------------------
3930            13  0 14  | 15 16 17  |  0  0
3931     Proc1   0 18  0  | 19 20 21  |  0  0
3932             0  0  0  | 22 23  0  | 24  0
3933     -------------------------------------
3934     Proc2  25 26 27  |  0  0 28  | 29  0
3935            30  0  0  | 31 32 33  |  0 34
3936 .ve
3937 
3938    This can be represented as a collection of submatrices as:
3939 
3940 .vb
3941       A B C
3942       D E F
3943       G H I
3944 .ve
3945 
3946    Where the submatrices A,B,C are owned by proc0, D,E,F are
3947    owned by proc1, G,H,I are owned by proc2.
3948 
3949    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3950    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3951    The 'M','N' parameters are 8,8, and have the same values on all procs.
3952 
3953    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3954    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3955    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3956    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3957    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3958    matrix, ans [DF] as another SeqAIJ matrix.
3959 
3960    When d_nz, o_nz parameters are specified, d_nz storage elements are
3961    allocated for every row of the local diagonal submatrix, and o_nz
3962    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3963    One way to choose d_nz and o_nz is to use the max nonzerors per local
3964    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3965    In this case, the values of d_nz,o_nz are:
3966 .vb
3967      proc0 : dnz = 2, o_nz = 2
3968      proc1 : dnz = 3, o_nz = 2
3969      proc2 : dnz = 1, o_nz = 4
3970 .ve
3971    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3972    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3973    for proc3. i.e we are using 12+15+10=37 storage locations to store
3974    34 values.
3975 
3976    When d_nnz, o_nnz parameters are specified, the storage is specified
3977    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3978    In the above case the values for d_nnz,o_nnz are:
3979 .vb
3980      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3981      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3982      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3983 .ve
3984    Here the space allocated is sum of all the above values i.e 34, and
3985    hence pre-allocation is perfect.
3986 
3987    Level: intermediate
3988 
3989 .keywords: matrix, aij, compressed row, sparse, parallel
3990 
3991 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3992           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3993 @*/
3994 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3995 {
3996   PetscErrorCode ierr;
3997 
3998   PetscFunctionBegin;
3999   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4000   PetscValidType(B,1);
4001   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4002   PetscFunctionReturn(0);
4003 }
4004 
4005 #undef __FUNCT__
4006 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4007 /*@
4008      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4009          CSR format the local rows.
4010 
4011    Collective on MPI_Comm
4012 
4013    Input Parameters:
4014 +  comm - MPI communicator
4015 .  m - number of local rows (Cannot be PETSC_DECIDE)
4016 .  n - This value should be the same as the local size used in creating the
4017        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4018        calculated if N is given) For square matrices n is almost always m.
4019 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4020 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4021 .   i - row indices
4022 .   j - column indices
4023 -   a - matrix values
4024 
4025    Output Parameter:
4026 .   mat - the matrix
4027 
4028    Level: intermediate
4029 
4030    Notes:
4031        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4032      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4033      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4034 
4035        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4036 
4037        The format which is used for the sparse matrix input, is equivalent to a
4038     row-major ordering.. i.e for the following matrix, the input data expected is
4039     as shown:
4040 
4041         1 0 0
4042         2 0 3     P0
4043        -------
4044         4 5 6     P1
4045 
4046      Process0 [P0]: rows_owned=[0,1]
4047         i =  {0,1,3}  [size = nrow+1  = 2+1]
4048         j =  {0,0,2}  [size = nz = 6]
4049         v =  {1,2,3}  [size = nz = 6]
4050 
4051      Process1 [P1]: rows_owned=[2]
4052         i =  {0,3}    [size = nrow+1  = 1+1]
4053         j =  {0,1,2}  [size = nz = 6]
4054         v =  {4,5,6}  [size = nz = 6]
4055 
4056 .keywords: matrix, aij, compressed row, sparse, parallel
4057 
4058 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4059           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4060 @*/
4061 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4062 {
4063   PetscErrorCode ierr;
4064 
4065   PetscFunctionBegin;
4066   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4067   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4068   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4069   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4070   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4071   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4072   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4073   PetscFunctionReturn(0);
4074 }
4075 
4076 #undef __FUNCT__
4077 #define __FUNCT__ "MatCreateAIJ"
4078 /*@C
4079    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4080    (the default parallel PETSc format).  For good matrix assembly performance
4081    the user should preallocate the matrix storage by setting the parameters
4082    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4083    performance can be increased by more than a factor of 50.
4084 
4085    Collective on MPI_Comm
4086 
4087    Input Parameters:
4088 +  comm - MPI communicator
4089 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4090            This value should be the same as the local size used in creating the
4091            y vector for the matrix-vector product y = Ax.
4092 .  n - This value should be the same as the local size used in creating the
4093        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4094        calculated if N is given) For square matrices n is almost always m.
4095 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4096 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4097 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4098            (same value is used for all local rows)
4099 .  d_nnz - array containing the number of nonzeros in the various rows of the
4100            DIAGONAL portion of the local submatrix (possibly different for each row)
4101            or NULL, if d_nz is used to specify the nonzero structure.
4102            The size of this array is equal to the number of local rows, i.e 'm'.
4103 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4104            submatrix (same value is used for all local rows).
4105 -  o_nnz - array containing the number of nonzeros in the various rows of the
4106            OFF-DIAGONAL portion of the local submatrix (possibly different for
4107            each row) or NULL, if o_nz is used to specify the nonzero
4108            structure. The size of this array is equal to the number
4109            of local rows, i.e 'm'.
4110 
4111    Output Parameter:
4112 .  A - the matrix
4113 
4114    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4115    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4116    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4117 
4118    Notes:
4119    If the *_nnz parameter is given then the *_nz parameter is ignored
4120 
4121    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4122    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4123    storage requirements for this matrix.
4124 
4125    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4126    processor than it must be used on all processors that share the object for
4127    that argument.
4128 
4129    The user MUST specify either the local or global matrix dimensions
4130    (possibly both).
4131 
4132    The parallel matrix is partitioned across processors such that the
4133    first m0 rows belong to process 0, the next m1 rows belong to
4134    process 1, the next m2 rows belong to process 2 etc.. where
4135    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4136    values corresponding to [m x N] submatrix.
4137 
4138    The columns are logically partitioned with the n0 columns belonging
4139    to 0th partition, the next n1 columns belonging to the next
4140    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4141 
4142    The DIAGONAL portion of the local submatrix on any given processor
4143    is the submatrix corresponding to the rows and columns m,n
4144    corresponding to the given processor. i.e diagonal matrix on
4145    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4146    etc. The remaining portion of the local submatrix [m x (N-n)]
4147    constitute the OFF-DIAGONAL portion. The example below better
4148    illustrates this concept.
4149 
4150    For a square global matrix we define each processor's diagonal portion
4151    to be its local rows and the corresponding columns (a square submatrix);
4152    each processor's off-diagonal portion encompasses the remainder of the
4153    local matrix (a rectangular submatrix).
4154 
4155    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4156 
4157    When calling this routine with a single process communicator, a matrix of
4158    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4159    type of communicator, use the construction mechanism:
4160      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4161 
4162    By default, this format uses inodes (identical nodes) when possible.
4163    We search for consecutive rows with the same nonzero structure, thereby
4164    reusing matrix information to achieve increased efficiency.
4165 
4166    Options Database Keys:
4167 +  -mat_no_inode  - Do not use inodes
4168 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4169 -  -mat_aij_oneindex - Internally use indexing starting at 1
4170         rather than 0.  Note that when calling MatSetValues(),
4171         the user still MUST index entries starting at 0!
4172 
4173 
4174    Example usage:
4175 
4176    Consider the following 8x8 matrix with 34 non-zero values, that is
4177    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4178    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4179    as follows:
4180 
4181 .vb
4182             1  2  0  |  0  3  0  |  0  4
4183     Proc0   0  5  6  |  7  0  0  |  8  0
4184             9  0 10  | 11  0  0  | 12  0
4185     -------------------------------------
4186            13  0 14  | 15 16 17  |  0  0
4187     Proc1   0 18  0  | 19 20 21  |  0  0
4188             0  0  0  | 22 23  0  | 24  0
4189     -------------------------------------
4190     Proc2  25 26 27  |  0  0 28  | 29  0
4191            30  0  0  | 31 32 33  |  0 34
4192 .ve
4193 
4194    This can be represented as a collection of submatrices as:
4195 
4196 .vb
4197       A B C
4198       D E F
4199       G H I
4200 .ve
4201 
4202    Where the submatrices A,B,C are owned by proc0, D,E,F are
4203    owned by proc1, G,H,I are owned by proc2.
4204 
4205    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4206    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4207    The 'M','N' parameters are 8,8, and have the same values on all procs.
4208 
4209    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4210    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4211    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4212    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4213    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4214    matrix, ans [DF] as another SeqAIJ matrix.
4215 
4216    When d_nz, o_nz parameters are specified, d_nz storage elements are
4217    allocated for every row of the local diagonal submatrix, and o_nz
4218    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4219    One way to choose d_nz and o_nz is to use the max nonzerors per local
4220    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4221    In this case, the values of d_nz,o_nz are:
4222 .vb
4223      proc0 : dnz = 2, o_nz = 2
4224      proc1 : dnz = 3, o_nz = 2
4225      proc2 : dnz = 1, o_nz = 4
4226 .ve
4227    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4228    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4229    for proc3. i.e we are using 12+15+10=37 storage locations to store
4230    34 values.
4231 
4232    When d_nnz, o_nnz parameters are specified, the storage is specified
4233    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4234    In the above case the values for d_nnz,o_nnz are:
4235 .vb
4236      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4237      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4238      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4239 .ve
4240    Here the space allocated is sum of all the above values i.e 34, and
4241    hence pre-allocation is perfect.
4242 
4243    Level: intermediate
4244 
4245 .keywords: matrix, aij, compressed row, sparse, parallel
4246 
4247 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4248           MPIAIJ, MatCreateMPIAIJWithArrays()
4249 @*/
4250 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4251 {
4252   PetscErrorCode ierr;
4253   PetscMPIInt    size;
4254 
4255   PetscFunctionBegin;
4256   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4257   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4258   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4259   if (size > 1) {
4260     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4261     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4262   } else {
4263     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4264     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4265   }
4266   PetscFunctionReturn(0);
4267 }
4268 
4269 #undef __FUNCT__
4270 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4271 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4272 {
4273   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4274 
4275   PetscFunctionBegin;
4276   *Ad     = a->A;
4277   *Ao     = a->B;
4278   *colmap = a->garray;
4279   PetscFunctionReturn(0);
4280 }
4281 
4282 #undef __FUNCT__
4283 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4284 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4285 {
4286   PetscErrorCode ierr;
4287   PetscInt       i;
4288   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4289 
4290   PetscFunctionBegin;
4291   if (coloring->ctype == IS_COLORING_GLOBAL) {
4292     ISColoringValue *allcolors,*colors;
4293     ISColoring      ocoloring;
4294 
4295     /* set coloring for diagonal portion */
4296     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4297 
4298     /* set coloring for off-diagonal portion */
4299     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4300     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4301     for (i=0; i<a->B->cmap->n; i++) {
4302       colors[i] = allcolors[a->garray[i]];
4303     }
4304     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4305     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4306     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4307     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4308   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4309     ISColoringValue *colors;
4310     PetscInt        *larray;
4311     ISColoring      ocoloring;
4312 
4313     /* set coloring for diagonal portion */
4314     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4315     for (i=0; i<a->A->cmap->n; i++) {
4316       larray[i] = i + A->cmap->rstart;
4317     }
4318     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4319     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4320     for (i=0; i<a->A->cmap->n; i++) {
4321       colors[i] = coloring->colors[larray[i]];
4322     }
4323     ierr = PetscFree(larray);CHKERRQ(ierr);
4324     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4325     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4326     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4327 
4328     /* set coloring for off-diagonal portion */
4329     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4330     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4331     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4332     for (i=0; i<a->B->cmap->n; i++) {
4333       colors[i] = coloring->colors[larray[i]];
4334     }
4335     ierr = PetscFree(larray);CHKERRQ(ierr);
4336     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4337     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4338     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4339   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4340   PetscFunctionReturn(0);
4341 }
4342 
4343 #undef __FUNCT__
4344 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4345 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4346 {
4347   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4348   PetscErrorCode ierr;
4349 
4350   PetscFunctionBegin;
4351   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4352   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4353   PetscFunctionReturn(0);
4354 }
4355 
4356 #undef __FUNCT__
4357 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4358 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4359 {
4360   PetscErrorCode ierr;
4361   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4362   PetscInt       *indx;
4363 
4364   PetscFunctionBegin;
4365   /* This routine will ONLY return MPIAIJ type matrix */
4366   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4367   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4368   if (n == PETSC_DECIDE) {
4369     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4370   }
4371   /* Check sum(n) = N */
4372   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4373   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4374 
4375   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4376   rstart -= m;
4377 
4378   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4379   for (i=0; i<m; i++) {
4380     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4381     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4382     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4383   }
4384 
4385   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4386   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4387   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4388   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4389   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4390   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4391   PetscFunctionReturn(0);
4392 }
4393 
4394 #undef __FUNCT__
4395 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4396 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4397 {
4398   PetscErrorCode ierr;
4399   PetscInt       m,N,i,rstart,nnz,Ii;
4400   PetscInt       *indx;
4401   PetscScalar    *values;
4402 
4403   PetscFunctionBegin;
4404   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4405   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4406   for (i=0; i<m; i++) {
4407     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4408     Ii   = i + rstart;
4409     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4410     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4411   }
4412   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4413   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4414   PetscFunctionReturn(0);
4415 }
4416 
4417 #undef __FUNCT__
4418 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4419 /*@
4420       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4421                  matrices from each processor
4422 
4423     Collective on MPI_Comm
4424 
4425    Input Parameters:
4426 +    comm - the communicators the parallel matrix will live on
4427 .    inmat - the input sequential matrices
4428 .    n - number of local columns (or PETSC_DECIDE)
4429 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4430 
4431    Output Parameter:
4432 .    outmat - the parallel matrix generated
4433 
4434     Level: advanced
4435 
4436    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4437 
4438 @*/
4439 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4440 {
4441   PetscErrorCode ierr;
4442   PetscMPIInt    size;
4443 
4444   PetscFunctionBegin;
4445   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4446   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4447   if (size == 1) {
4448     if (scall == MAT_INITIAL_MATRIX) {
4449       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4450     } else {
4451       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4452     }
4453   } else {
4454     if (scall == MAT_INITIAL_MATRIX) {
4455       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4456     }
4457     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4458   }
4459   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4460   PetscFunctionReturn(0);
4461 }
4462 
4463 #undef __FUNCT__
4464 #define __FUNCT__ "MatFileSplit"
4465 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4466 {
4467   PetscErrorCode    ierr;
4468   PetscMPIInt       rank;
4469   PetscInt          m,N,i,rstart,nnz;
4470   size_t            len;
4471   const PetscInt    *indx;
4472   PetscViewer       out;
4473   char              *name;
4474   Mat               B;
4475   const PetscScalar *values;
4476 
4477   PetscFunctionBegin;
4478   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4479   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4480   /* Should this be the type of the diagonal block of A? */
4481   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4482   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4483   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4484   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4485   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4486   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4487   for (i=0; i<m; i++) {
4488     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4489     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4490     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4491   }
4492   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4493   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4494 
4495   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4496   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4497   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4498   sprintf(name,"%s.%d",outfile,rank);
4499   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4500   ierr = PetscFree(name);CHKERRQ(ierr);
4501   ierr = MatView(B,out);CHKERRQ(ierr);
4502   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4503   ierr = MatDestroy(&B);CHKERRQ(ierr);
4504   PetscFunctionReturn(0);
4505 }
4506 
4507 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4508 #undef __FUNCT__
4509 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4510 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4511 {
4512   PetscErrorCode      ierr;
4513   Mat_Merge_SeqsToMPI *merge;
4514   PetscContainer      container;
4515 
4516   PetscFunctionBegin;
4517   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4518   if (container) {
4519     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4520     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4521     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4522     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4523     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4524     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4525     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4526     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4527     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4528     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4529     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4530     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4531     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4532     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4533     ierr = PetscFree(merge);CHKERRQ(ierr);
4534     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4535   }
4536   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4537   PetscFunctionReturn(0);
4538 }
4539 
4540 #include <../src/mat/utils/freespace.h>
4541 #include <petscbt.h>
4542 
4543 #undef __FUNCT__
4544 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4545 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4546 {
4547   PetscErrorCode      ierr;
4548   MPI_Comm            comm;
4549   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4550   PetscMPIInt         size,rank,taga,*len_s;
4551   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4552   PetscInt            proc,m;
4553   PetscInt            **buf_ri,**buf_rj;
4554   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4555   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4556   MPI_Request         *s_waits,*r_waits;
4557   MPI_Status          *status;
4558   MatScalar           *aa=a->a;
4559   MatScalar           **abuf_r,*ba_i;
4560   Mat_Merge_SeqsToMPI *merge;
4561   PetscContainer      container;
4562 
4563   PetscFunctionBegin;
4564   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4565   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4566 
4567   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4568   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4569 
4570   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4571   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4572 
4573   bi     = merge->bi;
4574   bj     = merge->bj;
4575   buf_ri = merge->buf_ri;
4576   buf_rj = merge->buf_rj;
4577 
4578   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4579   owners = merge->rowmap->range;
4580   len_s  = merge->len_s;
4581 
4582   /* send and recv matrix values */
4583   /*-----------------------------*/
4584   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4585   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4586 
4587   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4588   for (proc=0,k=0; proc<size; proc++) {
4589     if (!len_s[proc]) continue;
4590     i    = owners[proc];
4591     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4592     k++;
4593   }
4594 
4595   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4596   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4597   ierr = PetscFree(status);CHKERRQ(ierr);
4598 
4599   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4600   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4601 
4602   /* insert mat values of mpimat */
4603   /*----------------------------*/
4604   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4605   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4606 
4607   for (k=0; k<merge->nrecv; k++) {
4608     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4609     nrows       = *(buf_ri_k[k]);
4610     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4611     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4612   }
4613 
4614   /* set values of ba */
4615   m = merge->rowmap->n;
4616   for (i=0; i<m; i++) {
4617     arow = owners[rank] + i;
4618     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4619     bnzi = bi[i+1] - bi[i];
4620     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4621 
4622     /* add local non-zero vals of this proc's seqmat into ba */
4623     anzi   = ai[arow+1] - ai[arow];
4624     aj     = a->j + ai[arow];
4625     aa     = a->a + ai[arow];
4626     nextaj = 0;
4627     for (j=0; nextaj<anzi; j++) {
4628       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4629         ba_i[j] += aa[nextaj++];
4630       }
4631     }
4632 
4633     /* add received vals into ba */
4634     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4635       /* i-th row */
4636       if (i == *nextrow[k]) {
4637         anzi   = *(nextai[k]+1) - *nextai[k];
4638         aj     = buf_rj[k] + *(nextai[k]);
4639         aa     = abuf_r[k] + *(nextai[k]);
4640         nextaj = 0;
4641         for (j=0; nextaj<anzi; j++) {
4642           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4643             ba_i[j] += aa[nextaj++];
4644           }
4645         }
4646         nextrow[k]++; nextai[k]++;
4647       }
4648     }
4649     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4650   }
4651   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4652   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4653 
4654   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4655   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4656   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4657   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4658   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4659   PetscFunctionReturn(0);
4660 }
4661 
4662 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4663 
4664 #undef __FUNCT__
4665 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4666 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4667 {
4668   PetscErrorCode      ierr;
4669   Mat                 B_mpi;
4670   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4671   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4672   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4673   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4674   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4675   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4676   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4677   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4678   MPI_Status          *status;
4679   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4680   PetscBT             lnkbt;
4681   Mat_Merge_SeqsToMPI *merge;
4682   PetscContainer      container;
4683 
4684   PetscFunctionBegin;
4685   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4686 
4687   /* make sure it is a PETSc comm */
4688   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4689   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4690   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4691 
4692   ierr = PetscNew(&merge);CHKERRQ(ierr);
4693   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4694 
4695   /* determine row ownership */
4696   /*---------------------------------------------------------*/
4697   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4698   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4699   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4700   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4701   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4702   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4703   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4704 
4705   m      = merge->rowmap->n;
4706   owners = merge->rowmap->range;
4707 
4708   /* determine the number of messages to send, their lengths */
4709   /*---------------------------------------------------------*/
4710   len_s = merge->len_s;
4711 
4712   len          = 0; /* length of buf_si[] */
4713   merge->nsend = 0;
4714   for (proc=0; proc<size; proc++) {
4715     len_si[proc] = 0;
4716     if (proc == rank) {
4717       len_s[proc] = 0;
4718     } else {
4719       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4720       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4721     }
4722     if (len_s[proc]) {
4723       merge->nsend++;
4724       nrows = 0;
4725       for (i=owners[proc]; i<owners[proc+1]; i++) {
4726         if (ai[i+1] > ai[i]) nrows++;
4727       }
4728       len_si[proc] = 2*(nrows+1);
4729       len         += len_si[proc];
4730     }
4731   }
4732 
4733   /* determine the number and length of messages to receive for ij-structure */
4734   /*-------------------------------------------------------------------------*/
4735   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4736   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4737 
4738   /* post the Irecv of j-structure */
4739   /*-------------------------------*/
4740   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4741   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4742 
4743   /* post the Isend of j-structure */
4744   /*--------------------------------*/
4745   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4746 
4747   for (proc=0, k=0; proc<size; proc++) {
4748     if (!len_s[proc]) continue;
4749     i    = owners[proc];
4750     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4751     k++;
4752   }
4753 
4754   /* receives and sends of j-structure are complete */
4755   /*------------------------------------------------*/
4756   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4757   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4758 
4759   /* send and recv i-structure */
4760   /*---------------------------*/
4761   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4762   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4763 
4764   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4765   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4766   for (proc=0,k=0; proc<size; proc++) {
4767     if (!len_s[proc]) continue;
4768     /* form outgoing message for i-structure:
4769          buf_si[0]:                 nrows to be sent
4770                [1:nrows]:           row index (global)
4771                [nrows+1:2*nrows+1]: i-structure index
4772     */
4773     /*-------------------------------------------*/
4774     nrows       = len_si[proc]/2 - 1;
4775     buf_si_i    = buf_si + nrows+1;
4776     buf_si[0]   = nrows;
4777     buf_si_i[0] = 0;
4778     nrows       = 0;
4779     for (i=owners[proc]; i<owners[proc+1]; i++) {
4780       anzi = ai[i+1] - ai[i];
4781       if (anzi) {
4782         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4783         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4784         nrows++;
4785       }
4786     }
4787     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4788     k++;
4789     buf_si += len_si[proc];
4790   }
4791 
4792   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4793   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4794 
4795   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4796   for (i=0; i<merge->nrecv; i++) {
4797     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4798   }
4799 
4800   ierr = PetscFree(len_si);CHKERRQ(ierr);
4801   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4802   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4803   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4804   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4805   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4806   ierr = PetscFree(status);CHKERRQ(ierr);
4807 
4808   /* compute a local seq matrix in each processor */
4809   /*----------------------------------------------*/
4810   /* allocate bi array and free space for accumulating nonzero column info */
4811   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4812   bi[0] = 0;
4813 
4814   /* create and initialize a linked list */
4815   nlnk = N+1;
4816   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4817 
4818   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4819   len  = ai[owners[rank+1]] - ai[owners[rank]];
4820   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4821 
4822   current_space = free_space;
4823 
4824   /* determine symbolic info for each local row */
4825   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4826 
4827   for (k=0; k<merge->nrecv; k++) {
4828     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4829     nrows       = *buf_ri_k[k];
4830     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4831     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4832   }
4833 
4834   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4835   len  = 0;
4836   for (i=0; i<m; i++) {
4837     bnzi = 0;
4838     /* add local non-zero cols of this proc's seqmat into lnk */
4839     arow  = owners[rank] + i;
4840     anzi  = ai[arow+1] - ai[arow];
4841     aj    = a->j + ai[arow];
4842     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4843     bnzi += nlnk;
4844     /* add received col data into lnk */
4845     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4846       if (i == *nextrow[k]) { /* i-th row */
4847         anzi  = *(nextai[k]+1) - *nextai[k];
4848         aj    = buf_rj[k] + *nextai[k];
4849         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4850         bnzi += nlnk;
4851         nextrow[k]++; nextai[k]++;
4852       }
4853     }
4854     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4855 
4856     /* if free space is not available, make more free space */
4857     if (current_space->local_remaining<bnzi) {
4858       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4859       nspacedouble++;
4860     }
4861     /* copy data into free space, then initialize lnk */
4862     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4863     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4864 
4865     current_space->array           += bnzi;
4866     current_space->local_used      += bnzi;
4867     current_space->local_remaining -= bnzi;
4868 
4869     bi[i+1] = bi[i] + bnzi;
4870   }
4871 
4872   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4873 
4874   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4875   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4876   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4877 
4878   /* create symbolic parallel matrix B_mpi */
4879   /*---------------------------------------*/
4880   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4881   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4882   if (n==PETSC_DECIDE) {
4883     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4884   } else {
4885     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4886   }
4887   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4888   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4889   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4890   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4891   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4892 
4893   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4894   B_mpi->assembled    = PETSC_FALSE;
4895   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4896   merge->bi           = bi;
4897   merge->bj           = bj;
4898   merge->buf_ri       = buf_ri;
4899   merge->buf_rj       = buf_rj;
4900   merge->coi          = NULL;
4901   merge->coj          = NULL;
4902   merge->owners_co    = NULL;
4903 
4904   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4905 
4906   /* attach the supporting struct to B_mpi for reuse */
4907   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4908   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4909   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4910   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4911   *mpimat = B_mpi;
4912 
4913   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4914   PetscFunctionReturn(0);
4915 }
4916 
4917 #undef __FUNCT__
4918 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4919 /*@C
4920       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4921                  matrices from each processor
4922 
4923     Collective on MPI_Comm
4924 
4925    Input Parameters:
4926 +    comm - the communicators the parallel matrix will live on
4927 .    seqmat - the input sequential matrices
4928 .    m - number of local rows (or PETSC_DECIDE)
4929 .    n - number of local columns (or PETSC_DECIDE)
4930 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4931 
4932    Output Parameter:
4933 .    mpimat - the parallel matrix generated
4934 
4935     Level: advanced
4936 
4937    Notes:
4938      The dimensions of the sequential matrix in each processor MUST be the same.
4939      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4940      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4941 @*/
4942 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4943 {
4944   PetscErrorCode ierr;
4945   PetscMPIInt    size;
4946 
4947   PetscFunctionBegin;
4948   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4949   if (size == 1) {
4950     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4951     if (scall == MAT_INITIAL_MATRIX) {
4952       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4953     } else {
4954       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4955     }
4956     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4957     PetscFunctionReturn(0);
4958   }
4959   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4960   if (scall == MAT_INITIAL_MATRIX) {
4961     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4962   }
4963   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4964   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4965   PetscFunctionReturn(0);
4966 }
4967 
4968 #undef __FUNCT__
4969 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4970 /*@
4971      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4972           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4973           with MatGetSize()
4974 
4975     Not Collective
4976 
4977    Input Parameters:
4978 +    A - the matrix
4979 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4980 
4981    Output Parameter:
4982 .    A_loc - the local sequential matrix generated
4983 
4984     Level: developer
4985 
4986 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4987 
4988 @*/
4989 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4990 {
4991   PetscErrorCode ierr;
4992   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4993   Mat_SeqAIJ     *mat,*a,*b;
4994   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4995   MatScalar      *aa,*ba,*cam;
4996   PetscScalar    *ca;
4997   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4998   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4999   PetscBool      match;
5000 
5001   PetscFunctionBegin;
5002   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5003   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5004   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5005   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5006   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5007   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5008   aa = a->a; ba = b->a;
5009   if (scall == MAT_INITIAL_MATRIX) {
5010     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
5011     ci[0] = 0;
5012     for (i=0; i<am; i++) {
5013       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5014     }
5015     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
5016     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
5017     k    = 0;
5018     for (i=0; i<am; i++) {
5019       ncols_o = bi[i+1] - bi[i];
5020       ncols_d = ai[i+1] - ai[i];
5021       /* off-diagonal portion of A */
5022       for (jo=0; jo<ncols_o; jo++) {
5023         col = cmap[*bj];
5024         if (col >= cstart) break;
5025         cj[k]   = col; bj++;
5026         ca[k++] = *ba++;
5027       }
5028       /* diagonal portion of A */
5029       for (j=0; j<ncols_d; j++) {
5030         cj[k]   = cstart + *aj++;
5031         ca[k++] = *aa++;
5032       }
5033       /* off-diagonal portion of A */
5034       for (j=jo; j<ncols_o; j++) {
5035         cj[k]   = cmap[*bj++];
5036         ca[k++] = *ba++;
5037       }
5038     }
5039     /* put together the new matrix */
5040     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5041     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5042     /* Since these are PETSc arrays, change flags to free them as necessary. */
5043     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5044     mat->free_a  = PETSC_TRUE;
5045     mat->free_ij = PETSC_TRUE;
5046     mat->nonew   = 0;
5047   } else if (scall == MAT_REUSE_MATRIX) {
5048     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5049     ci = mat->i; cj = mat->j; cam = mat->a;
5050     for (i=0; i<am; i++) {
5051       /* off-diagonal portion of A */
5052       ncols_o = bi[i+1] - bi[i];
5053       for (jo=0; jo<ncols_o; jo++) {
5054         col = cmap[*bj];
5055         if (col >= cstart) break;
5056         *cam++ = *ba++; bj++;
5057       }
5058       /* diagonal portion of A */
5059       ncols_d = ai[i+1] - ai[i];
5060       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5061       /* off-diagonal portion of A */
5062       for (j=jo; j<ncols_o; j++) {
5063         *cam++ = *ba++; bj++;
5064       }
5065     }
5066   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5067   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5068   PetscFunctionReturn(0);
5069 }
5070 
5071 #undef __FUNCT__
5072 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5073 /*@C
5074      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5075 
5076     Not Collective
5077 
5078    Input Parameters:
5079 +    A - the matrix
5080 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5081 -    row, col - index sets of rows and columns to extract (or NULL)
5082 
5083    Output Parameter:
5084 .    A_loc - the local sequential matrix generated
5085 
5086     Level: developer
5087 
5088 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5089 
5090 @*/
5091 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5092 {
5093   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5094   PetscErrorCode ierr;
5095   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5096   IS             isrowa,iscola;
5097   Mat            *aloc;
5098   PetscBool      match;
5099 
5100   PetscFunctionBegin;
5101   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5102   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5103   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5104   if (!row) {
5105     start = A->rmap->rstart; end = A->rmap->rend;
5106     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5107   } else {
5108     isrowa = *row;
5109   }
5110   if (!col) {
5111     start = A->cmap->rstart;
5112     cmap  = a->garray;
5113     nzA   = a->A->cmap->n;
5114     nzB   = a->B->cmap->n;
5115     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5116     ncols = 0;
5117     for (i=0; i<nzB; i++) {
5118       if (cmap[i] < start) idx[ncols++] = cmap[i];
5119       else break;
5120     }
5121     imark = i;
5122     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5123     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5124     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5125   } else {
5126     iscola = *col;
5127   }
5128   if (scall != MAT_INITIAL_MATRIX) {
5129     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5130     aloc[0] = *A_loc;
5131   }
5132   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5133   *A_loc = aloc[0];
5134   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5135   if (!row) {
5136     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5137   }
5138   if (!col) {
5139     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5140   }
5141   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5142   PetscFunctionReturn(0);
5143 }
5144 
5145 #undef __FUNCT__
5146 #define __FUNCT__ "MatGetBrowsOfAcols"
5147 /*@C
5148     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5149 
5150     Collective on Mat
5151 
5152    Input Parameters:
5153 +    A,B - the matrices in mpiaij format
5154 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5155 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5156 
5157    Output Parameter:
5158 +    rowb, colb - index sets of rows and columns of B to extract
5159 -    B_seq - the sequential matrix generated
5160 
5161     Level: developer
5162 
5163 @*/
5164 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5165 {
5166   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5167   PetscErrorCode ierr;
5168   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5169   IS             isrowb,iscolb;
5170   Mat            *bseq=NULL;
5171 
5172   PetscFunctionBegin;
5173   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5174     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5175   }
5176   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5177 
5178   if (scall == MAT_INITIAL_MATRIX) {
5179     start = A->cmap->rstart;
5180     cmap  = a->garray;
5181     nzA   = a->A->cmap->n;
5182     nzB   = a->B->cmap->n;
5183     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5184     ncols = 0;
5185     for (i=0; i<nzB; i++) {  /* row < local row index */
5186       if (cmap[i] < start) idx[ncols++] = cmap[i];
5187       else break;
5188     }
5189     imark = i;
5190     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5191     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5192     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5193     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5194   } else {
5195     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5196     isrowb  = *rowb; iscolb = *colb;
5197     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5198     bseq[0] = *B_seq;
5199   }
5200   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5201   *B_seq = bseq[0];
5202   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5203   if (!rowb) {
5204     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5205   } else {
5206     *rowb = isrowb;
5207   }
5208   if (!colb) {
5209     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5210   } else {
5211     *colb = iscolb;
5212   }
5213   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5214   PetscFunctionReturn(0);
5215 }
5216 
5217 #undef __FUNCT__
5218 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5219 /*
5220     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5221     of the OFF-DIAGONAL portion of local A
5222 
5223     Collective on Mat
5224 
5225    Input Parameters:
5226 +    A,B - the matrices in mpiaij format
5227 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5228 
5229    Output Parameter:
5230 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5231 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5232 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5233 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5234 
5235     Level: developer
5236 
5237 */
5238 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5239 {
5240   VecScatter_MPI_General *gen_to,*gen_from;
5241   PetscErrorCode         ierr;
5242   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5243   Mat_SeqAIJ             *b_oth;
5244   VecScatter             ctx =a->Mvctx;
5245   MPI_Comm               comm;
5246   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5247   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5248   PetscScalar            *rvalues,*svalues;
5249   MatScalar              *b_otha,*bufa,*bufA;
5250   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5251   MPI_Request            *rwaits = NULL,*swaits = NULL;
5252   MPI_Status             *sstatus,rstatus;
5253   PetscMPIInt            jj;
5254   PetscInt               *cols,sbs,rbs;
5255   PetscScalar            *vals;
5256 
5257   PetscFunctionBegin;
5258   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5259   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5260     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5261   }
5262   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5263   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5264 
5265   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5266   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5267   rvalues  = gen_from->values; /* holds the length of receiving row */
5268   svalues  = gen_to->values;   /* holds the length of sending row */
5269   nrecvs   = gen_from->n;
5270   nsends   = gen_to->n;
5271 
5272   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5273   srow    = gen_to->indices;    /* local row index to be sent */
5274   sstarts = gen_to->starts;
5275   sprocs  = gen_to->procs;
5276   sstatus = gen_to->sstatus;
5277   sbs     = gen_to->bs;
5278   rstarts = gen_from->starts;
5279   rprocs  = gen_from->procs;
5280   rbs     = gen_from->bs;
5281 
5282   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5283   if (scall == MAT_INITIAL_MATRIX) {
5284     /* i-array */
5285     /*---------*/
5286     /*  post receives */
5287     for (i=0; i<nrecvs; i++) {
5288       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5289       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5290       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5291     }
5292 
5293     /* pack the outgoing message */
5294     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5295 
5296     sstartsj[0] = 0;
5297     rstartsj[0] = 0;
5298     len         = 0; /* total length of j or a array to be sent */
5299     k           = 0;
5300     for (i=0; i<nsends; i++) {
5301       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5302       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5303       for (j=0; j<nrows; j++) {
5304         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5305         for (l=0; l<sbs; l++) {
5306           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5307 
5308           rowlen[j*sbs+l] = ncols;
5309 
5310           len += ncols;
5311           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5312         }
5313         k++;
5314       }
5315       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5316 
5317       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5318     }
5319     /* recvs and sends of i-array are completed */
5320     i = nrecvs;
5321     while (i--) {
5322       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5323     }
5324     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5325 
5326     /* allocate buffers for sending j and a arrays */
5327     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5328     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5329 
5330     /* create i-array of B_oth */
5331     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5332 
5333     b_othi[0] = 0;
5334     len       = 0; /* total length of j or a array to be received */
5335     k         = 0;
5336     for (i=0; i<nrecvs; i++) {
5337       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5338       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5339       for (j=0; j<nrows; j++) {
5340         b_othi[k+1] = b_othi[k] + rowlen[j];
5341         len        += rowlen[j]; k++;
5342       }
5343       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5344     }
5345 
5346     /* allocate space for j and a arrrays of B_oth */
5347     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5348     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5349 
5350     /* j-array */
5351     /*---------*/
5352     /*  post receives of j-array */
5353     for (i=0; i<nrecvs; i++) {
5354       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5355       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5356     }
5357 
5358     /* pack the outgoing message j-array */
5359     k = 0;
5360     for (i=0; i<nsends; i++) {
5361       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5362       bufJ  = bufj+sstartsj[i];
5363       for (j=0; j<nrows; j++) {
5364         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5365         for (ll=0; ll<sbs; ll++) {
5366           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5367           for (l=0; l<ncols; l++) {
5368             *bufJ++ = cols[l];
5369           }
5370           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5371         }
5372       }
5373       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5374     }
5375 
5376     /* recvs and sends of j-array are completed */
5377     i = nrecvs;
5378     while (i--) {
5379       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5380     }
5381     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5382   } else if (scall == MAT_REUSE_MATRIX) {
5383     sstartsj = *startsj_s;
5384     rstartsj = *startsj_r;
5385     bufa     = *bufa_ptr;
5386     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5387     b_otha   = b_oth->a;
5388   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5389 
5390   /* a-array */
5391   /*---------*/
5392   /*  post receives of a-array */
5393   for (i=0; i<nrecvs; i++) {
5394     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5395     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5396   }
5397 
5398   /* pack the outgoing message a-array */
5399   k = 0;
5400   for (i=0; i<nsends; i++) {
5401     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5402     bufA  = bufa+sstartsj[i];
5403     for (j=0; j<nrows; j++) {
5404       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5405       for (ll=0; ll<sbs; ll++) {
5406         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5407         for (l=0; l<ncols; l++) {
5408           *bufA++ = vals[l];
5409         }
5410         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5411       }
5412     }
5413     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5414   }
5415   /* recvs and sends of a-array are completed */
5416   i = nrecvs;
5417   while (i--) {
5418     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5419   }
5420   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5421   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5422 
5423   if (scall == MAT_INITIAL_MATRIX) {
5424     /* put together the new matrix */
5425     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5426 
5427     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5428     /* Since these are PETSc arrays, change flags to free them as necessary. */
5429     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5430     b_oth->free_a  = PETSC_TRUE;
5431     b_oth->free_ij = PETSC_TRUE;
5432     b_oth->nonew   = 0;
5433 
5434     ierr = PetscFree(bufj);CHKERRQ(ierr);
5435     if (!startsj_s || !bufa_ptr) {
5436       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5437       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5438     } else {
5439       *startsj_s = sstartsj;
5440       *startsj_r = rstartsj;
5441       *bufa_ptr  = bufa;
5442     }
5443   }
5444   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5445   PetscFunctionReturn(0);
5446 }
5447 
5448 #undef __FUNCT__
5449 #define __FUNCT__ "MatGetCommunicationStructs"
5450 /*@C
5451   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5452 
5453   Not Collective
5454 
5455   Input Parameters:
5456 . A - The matrix in mpiaij format
5457 
5458   Output Parameter:
5459 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5460 . colmap - A map from global column index to local index into lvec
5461 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5462 
5463   Level: developer
5464 
5465 @*/
5466 #if defined(PETSC_USE_CTABLE)
5467 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5468 #else
5469 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5470 #endif
5471 {
5472   Mat_MPIAIJ *a;
5473 
5474   PetscFunctionBegin;
5475   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5476   PetscValidPointer(lvec, 2);
5477   PetscValidPointer(colmap, 3);
5478   PetscValidPointer(multScatter, 4);
5479   a = (Mat_MPIAIJ*) A->data;
5480   if (lvec) *lvec = a->lvec;
5481   if (colmap) *colmap = a->colmap;
5482   if (multScatter) *multScatter = a->Mvctx;
5483   PetscFunctionReturn(0);
5484 }
5485 
5486 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5487 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5488 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5489 
5490 #undef __FUNCT__
5491 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5492 /*
5493     Computes (B'*A')' since computing B*A directly is untenable
5494 
5495                n                       p                          p
5496         (              )       (              )         (                  )
5497       m (      A       )  *  n (       B      )   =   m (         C        )
5498         (              )       (              )         (                  )
5499 
5500 */
5501 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5502 {
5503   PetscErrorCode ierr;
5504   Mat            At,Bt,Ct;
5505 
5506   PetscFunctionBegin;
5507   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5508   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5509   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5510   ierr = MatDestroy(&At);CHKERRQ(ierr);
5511   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5512   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5513   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5514   PetscFunctionReturn(0);
5515 }
5516 
5517 #undef __FUNCT__
5518 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5519 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5520 {
5521   PetscErrorCode ierr;
5522   PetscInt       m=A->rmap->n,n=B->cmap->n;
5523   Mat            Cmat;
5524 
5525   PetscFunctionBegin;
5526   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5527   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5528   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5529   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5530   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5531   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5532   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5533   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5534 
5535   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5536 
5537   *C = Cmat;
5538   PetscFunctionReturn(0);
5539 }
5540 
5541 /* ----------------------------------------------------------------*/
5542 #undef __FUNCT__
5543 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5544 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5545 {
5546   PetscErrorCode ierr;
5547 
5548   PetscFunctionBegin;
5549   if (scall == MAT_INITIAL_MATRIX) {
5550     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5551     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5552     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5553   }
5554   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5555   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5556   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5557   PetscFunctionReturn(0);
5558 }
5559 
5560 #if defined(PETSC_HAVE_MUMPS)
5561 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5562 #endif
5563 #if defined(PETSC_HAVE_PASTIX)
5564 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5565 #endif
5566 #if defined(PETSC_HAVE_SUPERLU_DIST)
5567 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5568 #endif
5569 #if defined(PETSC_HAVE_CLIQUE)
5570 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5571 #endif
5572 
5573 /*MC
5574    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5575 
5576    Options Database Keys:
5577 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5578 
5579   Level: beginner
5580 
5581 .seealso: MatCreateAIJ()
5582 M*/
5583 
5584 #undef __FUNCT__
5585 #define __FUNCT__ "MatCreate_MPIAIJ"
5586 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5587 {
5588   Mat_MPIAIJ     *b;
5589   PetscErrorCode ierr;
5590   PetscMPIInt    size;
5591 
5592   PetscFunctionBegin;
5593   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5594 
5595   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5596   B->data       = (void*)b;
5597   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5598   B->assembled  = PETSC_FALSE;
5599   B->insertmode = NOT_SET_VALUES;
5600   b->size       = size;
5601 
5602   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5603 
5604   /* build cache for off array entries formed */
5605   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5606 
5607   b->donotstash  = PETSC_FALSE;
5608   b->colmap      = 0;
5609   b->garray      = 0;
5610   b->roworiented = PETSC_TRUE;
5611 
5612   /* stuff used for matrix vector multiply */
5613   b->lvec  = NULL;
5614   b->Mvctx = NULL;
5615 
5616   /* stuff for MatGetRow() */
5617   b->rowindices   = 0;
5618   b->rowvalues    = 0;
5619   b->getrowactive = PETSC_FALSE;
5620 
5621   /* flexible pointer used in CUSP/CUSPARSE classes */
5622   b->spptr = NULL;
5623 
5624 #if defined(PETSC_HAVE_MUMPS)
5625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5626 #endif
5627 #if defined(PETSC_HAVE_PASTIX)
5628   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5629 #endif
5630 #if defined(PETSC_HAVE_SUPERLU_DIST)
5631   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5632 #endif
5633 #if defined(PETSC_HAVE_CLIQUE)
5634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5635 #endif
5636   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5637   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5638   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5639   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5649   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5650   PetscFunctionReturn(0);
5651 }
5652 
5653 #undef __FUNCT__
5654 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5655 /*@
5656      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5657          and "off-diagonal" part of the matrix in CSR format.
5658 
5659    Collective on MPI_Comm
5660 
5661    Input Parameters:
5662 +  comm - MPI communicator
5663 .  m - number of local rows (Cannot be PETSC_DECIDE)
5664 .  n - This value should be the same as the local size used in creating the
5665        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5666        calculated if N is given) For square matrices n is almost always m.
5667 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5668 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5669 .   i - row indices for "diagonal" portion of matrix
5670 .   j - column indices
5671 .   a - matrix values
5672 .   oi - row indices for "off-diagonal" portion of matrix
5673 .   oj - column indices
5674 -   oa - matrix values
5675 
5676    Output Parameter:
5677 .   mat - the matrix
5678 
5679    Level: advanced
5680 
5681    Notes:
5682        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5683        must free the arrays once the matrix has been destroyed and not before.
5684 
5685        The i and j indices are 0 based
5686 
5687        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5688 
5689        This sets local rows and cannot be used to set off-processor values.
5690 
5691        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5692        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5693        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5694        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5695        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5696        communication if it is known that only local entries will be set.
5697 
5698 .keywords: matrix, aij, compressed row, sparse, parallel
5699 
5700 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5701           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5702 @*/
5703 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5704 {
5705   PetscErrorCode ierr;
5706   Mat_MPIAIJ     *maij;
5707 
5708   PetscFunctionBegin;
5709   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5710   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5711   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5712   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5713   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5714   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5715   maij = (Mat_MPIAIJ*) (*mat)->data;
5716 
5717   (*mat)->preallocated = PETSC_TRUE;
5718 
5719   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5720   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5721 
5722   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5723   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5724 
5725   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5726   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5727   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5728   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5729 
5730   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5731   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5732   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5733   PetscFunctionReturn(0);
5734 }
5735 
5736 /*
5737     Special version for direct calls from Fortran
5738 */
5739 #include <petsc-private/fortranimpl.h>
5740 
5741 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5742 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5743 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5744 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5745 #endif
5746 
5747 /* Change these macros so can be used in void function */
5748 #undef CHKERRQ
5749 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5750 #undef SETERRQ2
5751 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5752 #undef SETERRQ3
5753 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5754 #undef SETERRQ
5755 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5756 
5757 #undef __FUNCT__
5758 #define __FUNCT__ "matsetvaluesmpiaij_"
5759 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5760 {
5761   Mat            mat  = *mmat;
5762   PetscInt       m    = *mm, n = *mn;
5763   InsertMode     addv = *maddv;
5764   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5765   PetscScalar    value;
5766   PetscErrorCode ierr;
5767 
5768   MatCheckPreallocated(mat,1);
5769   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5770 
5771 #if defined(PETSC_USE_DEBUG)
5772   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5773 #endif
5774   {
5775     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5776     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5777     PetscBool roworiented = aij->roworiented;
5778 
5779     /* Some Variables required in the macro */
5780     Mat        A                 = aij->A;
5781     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5782     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5783     MatScalar  *aa               = a->a;
5784     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5785     Mat        B                 = aij->B;
5786     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5787     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5788     MatScalar  *ba               = b->a;
5789 
5790     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5791     PetscInt  nonew = a->nonew;
5792     MatScalar *ap1,*ap2;
5793 
5794     PetscFunctionBegin;
5795     for (i=0; i<m; i++) {
5796       if (im[i] < 0) continue;
5797 #if defined(PETSC_USE_DEBUG)
5798       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5799 #endif
5800       if (im[i] >= rstart && im[i] < rend) {
5801         row      = im[i] - rstart;
5802         lastcol1 = -1;
5803         rp1      = aj + ai[row];
5804         ap1      = aa + ai[row];
5805         rmax1    = aimax[row];
5806         nrow1    = ailen[row];
5807         low1     = 0;
5808         high1    = nrow1;
5809         lastcol2 = -1;
5810         rp2      = bj + bi[row];
5811         ap2      = ba + bi[row];
5812         rmax2    = bimax[row];
5813         nrow2    = bilen[row];
5814         low2     = 0;
5815         high2    = nrow2;
5816 
5817         for (j=0; j<n; j++) {
5818           if (roworiented) value = v[i*n+j];
5819           else value = v[i+j*m];
5820           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5821           if (in[j] >= cstart && in[j] < cend) {
5822             col = in[j] - cstart;
5823             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5824           } else if (in[j] < 0) continue;
5825 #if defined(PETSC_USE_DEBUG)
5826           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5827 #endif
5828           else {
5829             if (mat->was_assembled) {
5830               if (!aij->colmap) {
5831                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5832               }
5833 #if defined(PETSC_USE_CTABLE)
5834               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5835               col--;
5836 #else
5837               col = aij->colmap[in[j]] - 1;
5838 #endif
5839               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5840                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5841                 col  =  in[j];
5842                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5843                 B     = aij->B;
5844                 b     = (Mat_SeqAIJ*)B->data;
5845                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5846                 rp2   = bj + bi[row];
5847                 ap2   = ba + bi[row];
5848                 rmax2 = bimax[row];
5849                 nrow2 = bilen[row];
5850                 low2  = 0;
5851                 high2 = nrow2;
5852                 bm    = aij->B->rmap->n;
5853                 ba    = b->a;
5854               }
5855             } else col = in[j];
5856             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5857           }
5858         }
5859       } else if (!aij->donotstash) {
5860         if (roworiented) {
5861           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5862         } else {
5863           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5864         }
5865       }
5866     }
5867   }
5868   PetscFunctionReturnVoid();
5869 }
5870 
5871