xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2c7591d027da6313c7cd39b8aa0f08f93071e2d7)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
109 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
110 {
111   PetscErrorCode    ierr;
112   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
113 
114   PetscFunctionBegin;
115   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
116     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
117   } else {
118     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
119   }
120   PetscFunctionReturn(0);
121 }
122 
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
127 {
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
129   PetscErrorCode ierr;
130   PetscInt       i,rstart,nrows,*rows;
131 
132   PetscFunctionBegin;
133   *zrows = NULL;
134   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
135   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
136   for (i=0; i<nrows; i++) rows[i] += rstart;
137   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
144 {
145   PetscErrorCode ierr;
146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
147   PetscInt       i,n,*garray = aij->garray;
148   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
149   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
150   PetscReal      *work;
151 
152   PetscFunctionBegin;
153   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
154   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
155   if (type == NORM_2) {
156     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
157       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
158     }
159     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
160       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
161     }
162   } else if (type == NORM_1) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
168     }
169   } else if (type == NORM_INFINITY) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
175     }
176 
177   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
178   if (type == NORM_INFINITY) {
179     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
180   } else {
181     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
182   }
183   ierr = PetscFree(work);CHKERRQ(ierr);
184   if (type == NORM_2) {
185     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatDistribute_MPIAIJ"
192 /*
193     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
194     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
195 
196     Only for square matrices
197 
198     Used by a preconditioner, hence PETSC_EXTERN
199 */
200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
201 {
202   PetscMPIInt    rank,size;
203   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
204   PetscErrorCode ierr;
205   Mat            mat;
206   Mat_SeqAIJ     *gmata;
207   PetscMPIInt    tag;
208   MPI_Status     status;
209   PetscBool      aij;
210   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215   if (!rank) {
216     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
217     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
218   }
219   if (reuse == MAT_INITIAL_MATRIX) {
220     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
221     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
222     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
223     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
224     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
225     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
226     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
227     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
228     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
229 
230     rowners[0] = 0;
231     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
232     rstart = rowners[rank];
233     rend   = rowners[rank+1];
234     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
235     if (!rank) {
236       gmata = (Mat_SeqAIJ*) gmat->data;
237       /* send row lengths to all processors */
238       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
239       for (i=1; i<size; i++) {
240         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242       /* determine number diagonal and off-diagonal counts */
243       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
244       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
245       jj   = 0;
246       for (i=0; i<m; i++) {
247         for (j=0; j<dlens[i]; j++) {
248           if (gmata->j[jj] < rstart) ld[i]++;
249           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
250           jj++;
251         }
252       }
253       /* send column indices to other processes */
254       for (i=1; i<size; i++) {
255         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
256         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
257         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259 
260       /* send numerical values to other processes */
261       for (i=1; i<size; i++) {
262         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
263         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
264       }
265       gmataa = gmata->a;
266       gmataj = gmata->j;
267 
268     } else {
269       /* receive row lengths */
270       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
271       /* receive column indices */
272       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
273       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
275       /* determine number diagonal and off-diagonal counts */
276       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
277       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
278       jj   = 0;
279       for (i=0; i<m; i++) {
280         for (j=0; j<dlens[i]; j++) {
281           if (gmataj[jj] < rstart) ld[i]++;
282           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
283           jj++;
284         }
285       }
286       /* receive numerical values */
287       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
288       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
289     }
290     /* set preallocation */
291     for (i=0; i<m; i++) {
292       dlens[i] -= olens[i];
293     }
294     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
295     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
296 
297     for (i=0; i<m; i++) {
298       dlens[i] += olens[i];
299     }
300     cnt = 0;
301     for (i=0; i<m; i++) {
302       row  = rstart + i;
303       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
304       cnt += dlens[i];
305     }
306     if (rank) {
307       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
308     }
309     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
310     ierr = PetscFree(rowners);CHKERRQ(ierr);
311 
312     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
313 
314     *inmat = mat;
315   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
316     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
317     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
318     mat  = *inmat;
319     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
320     if (!rank) {
321       /* send numerical values to other processes */
322       gmata  = (Mat_SeqAIJ*) gmat->data;
323       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
324       gmataa = gmata->a;
325       for (i=1; i<size; i++) {
326         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
327         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
328       }
329       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
330     } else {
331       /* receive numerical values from process 0*/
332       nz   = Ad->nz + Ao->nz;
333       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
334       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
335     }
336     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
337     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
338     ad = Ad->a;
339     ao = Ao->a;
340     if (mat->rmap->n) {
341       i  = 0;
342       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
343       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
344     }
345     for (i=1; i<mat->rmap->n; i++) {
346       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
347       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
348     }
349     i--;
350     if (mat->rmap->n) {
351       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
352     }
353     if (rank) {
354       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
355     }
356   }
357   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to acess.
368 */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
372 {
373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
374   PetscErrorCode ierr;
375   PetscInt       n = aij->B->cmap->n,i;
376 
377   PetscFunctionBegin;
378   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
379 #if defined(PETSC_USE_CTABLE)
380   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
381   for (i=0; i<n; i++) {
382     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
383   }
384 #else
385   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
386   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
387   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
388 #endif
389   PetscFunctionReturn(0);
390 }
391 
392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
393 { \
394     if (col <= lastcol1)  low1 = 0;     \
395     else                 high1 = nrow1; \
396     lastcol1 = col;\
397     while (high1-low1 > 5) { \
398       t = (low1+high1)/2; \
399       if (rp1[t] > col) high1 = t; \
400       else              low1  = t; \
401     } \
402       for (_i=low1; _i<high1; _i++) { \
403         if (rp1[_i] > col) break; \
404         if (rp1[_i] == col) { \
405           if (addv == ADD_VALUES) ap1[_i] += value;   \
406           else                    ap1[_i] = value; \
407           goto a_noinsert; \
408         } \
409       }  \
410       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
411       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
412       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
413       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
414       N = nrow1++ - 1; a->nz++; high1++; \
415       /* shift up all the later entries in this row */ \
416       for (ii=N; ii>=_i; ii--) { \
417         rp1[ii+1] = rp1[ii]; \
418         ap1[ii+1] = ap1[ii]; \
419       } \
420       rp1[_i] = col;  \
421       ap1[_i] = value;  \
422       A->nonzerostate++;\
423       a_noinsert: ; \
424       ailen[row] = nrow1; \
425 }
426 
427 
428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
429   { \
430     if (col <= lastcol2) low2 = 0;                        \
431     else high2 = nrow2;                                   \
432     lastcol2 = col;                                       \
433     while (high2-low2 > 5) {                              \
434       t = (low2+high2)/2;                                 \
435       if (rp2[t] > col) high2 = t;                        \
436       else             low2  = t;                         \
437     }                                                     \
438     for (_i=low2; _i<high2; _i++) {                       \
439       if (rp2[_i] > col) break;                           \
440       if (rp2[_i] == col) {                               \
441         if (addv == ADD_VALUES) ap2[_i] += value;         \
442         else                    ap2[_i] = value;          \
443         goto b_noinsert;                                  \
444       }                                                   \
445     }                                                     \
446     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
447     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
448     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
449     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
450     N = nrow2++ - 1; b->nz++; high2++;                    \
451     /* shift up all the later entries in this row */      \
452     for (ii=N; ii>=_i; ii--) {                            \
453       rp2[ii+1] = rp2[ii];                                \
454       ap2[ii+1] = ap2[ii];                                \
455     }                                                     \
456     rp2[_i] = col;                                        \
457     ap2[_i] = value;                                      \
458     B->nonzerostate++;                                    \
459     b_noinsert: ;                                         \
460     bilen[row] = nrow2;                                   \
461   }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
466 {
467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
468   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
469   PetscErrorCode ierr;
470   PetscInt       l,*garray = mat->garray,diag;
471 
472   PetscFunctionBegin;
473   /* code only works for square matrices A */
474 
475   /* find size of row to the left of the diagonal part */
476   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
477   row  = row - diag;
478   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
479     if (garray[b->j[b->i[row]+l]] > diag) break;
480   }
481   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
482 
483   /* diagonal part */
484   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
485 
486   /* right of diagonal part */
487   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetValues_MPIAIJ"
493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
494 {
495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
496   PetscScalar    value;
497   PetscErrorCode ierr;
498   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
499   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
500   PetscBool      roworiented = aij->roworiented;
501 
502   /* Some Variables required in the macro */
503   Mat        A                 = aij->A;
504   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
505   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
506   MatScalar  *aa               = a->a;
507   PetscBool  ignorezeroentries = a->ignorezeroentries;
508   Mat        B                 = aij->B;
509   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
510   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
511   MatScalar  *ba               = b->a;
512 
513   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
514   PetscInt  nonew;
515   MatScalar *ap1,*ap2;
516 
517   PetscFunctionBegin;
518   for (i=0; i<m; i++) {
519     if (im[i] < 0) continue;
520 #if defined(PETSC_USE_DEBUG)
521     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
522 #endif
523     if (im[i] >= rstart && im[i] < rend) {
524       row      = im[i] - rstart;
525       lastcol1 = -1;
526       rp1      = aj + ai[row];
527       ap1      = aa + ai[row];
528       rmax1    = aimax[row];
529       nrow1    = ailen[row];
530       low1     = 0;
531       high1    = nrow1;
532       lastcol2 = -1;
533       rp2      = bj + bi[row];
534       ap2      = ba + bi[row];
535       rmax2    = bimax[row];
536       nrow2    = bilen[row];
537       low2     = 0;
538       high2    = nrow2;
539 
540       for (j=0; j<n; j++) {
541         if (roworiented) value = v[i*n+j];
542         else             value = v[i+j*m];
543         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
544         if (in[j] >= cstart && in[j] < cend) {
545           col   = in[j] - cstart;
546           nonew = a->nonew;
547           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
548         } else if (in[j] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
551 #endif
552         else {
553           if (mat->was_assembled) {
554             if (!aij->colmap) {
555               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
556             }
557 #if defined(PETSC_USE_CTABLE)
558             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
559             col--;
560 #else
561             col = aij->colmap[in[j]] - 1;
562 #endif
563             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
564               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
565               col  =  in[j];
566               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
567               B     = aij->B;
568               b     = (Mat_SeqAIJ*)B->data;
569               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
570               rp2   = bj + bi[row];
571               ap2   = ba + bi[row];
572               rmax2 = bimax[row];
573               nrow2 = bilen[row];
574               low2  = 0;
575               high2 = nrow2;
576               bm    = aij->B->rmap->n;
577               ba    = b->a;
578             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
579           } else col = in[j];
580           nonew = b->nonew;
581           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
582         }
583       }
584     } else {
585       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
586       if (!aij->donotstash) {
587         mat->assembled = PETSC_FALSE;
588         if (roworiented) {
589           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
590         } else {
591           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
592         }
593       }
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatGetValues_MPIAIJ"
601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
602 {
603   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
604   PetscErrorCode ierr;
605   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
607 
608   PetscFunctionBegin;
609   for (i=0; i<m; i++) {
610     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
611     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
612     if (idxm[i] >= rstart && idxm[i] < rend) {
613       row = idxm[i] - rstart;
614       for (j=0; j<n; j++) {
615         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
616         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
617         if (idxn[j] >= cstart && idxn[j] < cend) {
618           col  = idxn[j] - cstart;
619           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620         } else {
621           if (!aij->colmap) {
622             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
623           }
624 #if defined(PETSC_USE_CTABLE)
625           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
626           col--;
627 #else
628           col = aij->colmap[idxn[j]] - 1;
629 #endif
630           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
631           else {
632             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   /* make sure all processors are either in INSERTMODE or ADDMODE */
656   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
658   mat->insertmode = addv; /* in case this processor had no cache */
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
672   PetscErrorCode ierr;
673   PetscMPIInt    n;
674   PetscInt       i,j,rstart,ncols,flg;
675   PetscInt       *row,*col;
676   PetscBool      other_disassembled;
677   PetscScalar    *val;
678   InsertMode     addv = mat->insertmode;
679 
680   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
681 
682   PetscFunctionBegin;
683   if (!aij->donotstash && !mat->nooffprocentries) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       if (!flg) break;
687 
688       for (i=0; i<n; ) {
689         /* Now identify the consecutive vals belonging to the same row */
690         for (j=i,rstart=row[j]; j<n; j++) {
691           if (row[j] != rstart) break;
692         }
693         if (j < n) ncols = j-i;
694         else       ncols = n-i;
695         /* Now assemble all these values with a single function call */
696         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
697 
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702   }
703   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
704   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
705 
706   /* determine if any processor has disassembled, if so we must
707      also disassemble ourselfs, in order that we may reassemble. */
708   /*
709      if nonzero structure of submatrix B cannot change then we know that
710      no processor disassembled thus we can skip this stuff
711   */
712   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
713     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714     if (mat->was_assembled && !other_disassembled) {
715       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
716     }
717   }
718   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
719     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
720   }
721   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
722   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
723   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
724 
725   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
726 
727   aij->rowvalues = 0;
728 
729   /* used by MatAXPY() */
730   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
731   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
732 
733   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
734   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
735 
736   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
737   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
738     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
739     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
747 {
748   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
753   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatZeroRows_MPIAIJ"
759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
760 {
761   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
762   PetscInt      *owners = A->rmap->range;
763   PetscInt       n      = A->rmap->n;
764   PetscSF        sf;
765   PetscInt      *lrows;
766   PetscSFNode   *rrows;
767   PetscInt       r, p = 0, len = 0;
768   PetscErrorCode ierr;
769 
770   PetscFunctionBegin;
771   /* Create SF where leaves are input rows and roots are owned rows */
772   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
773   for (r = 0; r < n; ++r) lrows[r] = -1;
774   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
775   for (r = 0; r < N; ++r) {
776     const PetscInt idx   = rows[r];
777     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
778     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
779       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
780     }
781     if (A->nooffproczerorows) {
782       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
783       lrows[len++] = idx - owners[p];
784     } else {
785       rrows[r].rank = p;
786       rrows[r].index = rows[r] - owners[p];
787     }
788   }
789   if (!A->nooffproczerorows) {
790     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
791     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
792     /* Collect flags for rows to be zeroed */
793     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
794     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
795     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
796     /* Compress and put in row numbers */
797     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
798   }
799   /* fix right hand side if needed */
800   if (x && b) {
801     const PetscScalar *xx;
802     PetscScalar       *bb;
803 
804     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
805     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
806     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
807     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
808     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
809   }
810   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
811   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_Redundant"
1121 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1122 {
1123   PetscErrorCode ierr;
1124   Mat_Redundant  *redund = *redundant;
1125   PetscInt       i;
1126 
1127   PetscFunctionBegin;
1128   *redundant = NULL;
1129   if (redund){
1130     if (redund->matseq) { /* via MatGetSubMatrices()  */
1131       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1132       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1133       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1134       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1135     } else {
1136       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1137       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1138       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1139       for (i=0; i<redund->nrecvs; i++) {
1140         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1141         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1142       }
1143       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1144     }
1145 
1146     if (redund->subcomm) {
1147       ierr = PetscCommDestroy(&redund->subcomm);CHKERRQ(ierr);
1148     }
1149     ierr = PetscFree(redund);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatDestroy_MPIAIJ"
1156 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1157 {
1158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1159   PetscErrorCode ierr;
1160 
1161   PetscFunctionBegin;
1162 #if defined(PETSC_USE_LOG)
1163   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1164 #endif
1165   ierr = MatDestroy_Redundant(&mat->redundant);CHKERRQ(ierr);
1166   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1167   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1168   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1169   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1170 #if defined(PETSC_USE_CTABLE)
1171   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1172 #else
1173   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1174 #endif
1175   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1176   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1177   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1178   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1179   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1180   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1181 
1182   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1191 #if defined(PETSC_HAVE_ELEMENTAL)
1192   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1193 #endif
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1199 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1200 {
1201   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1202   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1203   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1204   PetscErrorCode ierr;
1205   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1206   int            fd;
1207   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1208   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1209   PetscScalar    *column_values;
1210   PetscInt       message_count,flowcontrolcount;
1211   FILE           *file;
1212 
1213   PetscFunctionBegin;
1214   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1215   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1216   nz   = A->nz + B->nz;
1217   if (!rank) {
1218     header[0] = MAT_FILE_CLASSID;
1219     header[1] = mat->rmap->N;
1220     header[2] = mat->cmap->N;
1221 
1222     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1223     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1224     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1225     /* get largest number of rows any processor has */
1226     rlen  = mat->rmap->n;
1227     range = mat->rmap->range;
1228     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1229   } else {
1230     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     rlen = mat->rmap->n;
1232   }
1233 
1234   /* load up the local row counts */
1235   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1236   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1237 
1238   /* store the row lengths to the file */
1239   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1240   if (!rank) {
1241     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     for (i=1; i<size; i++) {
1243       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1244       rlen = range[i+1] - range[i];
1245       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1246       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1247     }
1248     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1249   } else {
1250     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1251     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1255 
1256   /* load up the local column indices */
1257   nzmax = nz; /* th processor needs space a largest processor needs */
1258   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1260   cnt   = 0;
1261   for (i=0; i<mat->rmap->n; i++) {
1262     for (j=B->i[i]; j<B->i[i+1]; j++) {
1263       if ((col = garray[B->j[j]]) > cstart) break;
1264       column_indices[cnt++] = col;
1265     }
1266     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1267     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1268   }
1269   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1270 
1271   /* store the column indices to the file */
1272   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1273   if (!rank) {
1274     MPI_Status status;
1275     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1276     for (i=1; i<size; i++) {
1277       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1278       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1279       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1280       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1282     }
1283     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1284   } else {
1285     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1286     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1288     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1289   }
1290   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1291 
1292   /* load up the local column values */
1293   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1294   cnt  = 0;
1295   for (i=0; i<mat->rmap->n; i++) {
1296     for (j=B->i[i]; j<B->i[i+1]; j++) {
1297       if (garray[B->j[j]] > cstart) break;
1298       column_values[cnt++] = B->a[j];
1299     }
1300     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1301     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1302   }
1303   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1304 
1305   /* store the column values to the file */
1306   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1307   if (!rank) {
1308     MPI_Status status;
1309     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1310     for (i=1; i<size; i++) {
1311       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1312       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1313       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1314       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1316     }
1317     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1318   } else {
1319     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1320     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1321     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1322     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1323   }
1324   ierr = PetscFree(column_values);CHKERRQ(ierr);
1325 
1326   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1327   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #include <petscdraw.h>
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1334 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1335 {
1336   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1337   PetscErrorCode    ierr;
1338   PetscMPIInt       rank = aij->rank,size = aij->size;
1339   PetscBool         isdraw,iascii,isbinary;
1340   PetscViewer       sviewer;
1341   PetscViewerFormat format;
1342 
1343   PetscFunctionBegin;
1344   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1345   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1346   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1347   if (iascii) {
1348     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1349     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1350       MatInfo   info;
1351       PetscBool inodes;
1352 
1353       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1354       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1355       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1356       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1357       if (!inodes) {
1358         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1359                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1360       } else {
1361         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1362                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1363       }
1364       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1366       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1367       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1368       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1369       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1370       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1371       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1372       PetscFunctionReturn(0);
1373     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1374       PetscInt inodecount,inodelimit,*inodes;
1375       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1376       if (inodes) {
1377         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1378       } else {
1379         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1380       }
1381       PetscFunctionReturn(0);
1382     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1383       PetscFunctionReturn(0);
1384     }
1385   } else if (isbinary) {
1386     if (size == 1) {
1387       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1388       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1389     } else {
1390       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1391     }
1392     PetscFunctionReturn(0);
1393   } else if (isdraw) {
1394     PetscDraw draw;
1395     PetscBool isnull;
1396     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1397     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1398   }
1399 
1400   {
1401     /* assemble the entire matrix onto first processor. */
1402     Mat        A;
1403     Mat_SeqAIJ *Aloc;
1404     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1405     MatScalar  *a;
1406 
1407     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1408     if (!rank) {
1409       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1410     } else {
1411       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1412     }
1413     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1414     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1415     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1416     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1417     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1418 
1419     /* copy over the A part */
1420     Aloc = (Mat_SeqAIJ*)aij->A->data;
1421     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1422     row  = mat->rmap->rstart;
1423     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1424     for (i=0; i<m; i++) {
1425       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1426       row++;
1427       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1428     }
1429     aj = Aloc->j;
1430     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1431 
1432     /* copy over the B part */
1433     Aloc = (Mat_SeqAIJ*)aij->B->data;
1434     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1435     row  = mat->rmap->rstart;
1436     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1437     ct   = cols;
1438     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1439     for (i=0; i<m; i++) {
1440       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1441       row++;
1442       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1443     }
1444     ierr = PetscFree(ct);CHKERRQ(ierr);
1445     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1446     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1447     /*
1448        Everyone has to call to draw the matrix since the graphics waits are
1449        synchronized across all processors that share the PetscDraw object
1450     */
1451     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1452     if (!rank) {
1453       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1454     }
1455     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1456     ierr = MatDestroy(&A);CHKERRQ(ierr);
1457   }
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatView_MPIAIJ"
1463 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1464 {
1465   PetscErrorCode ierr;
1466   PetscBool      iascii,isdraw,issocket,isbinary;
1467 
1468   PetscFunctionBegin;
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1473   if (iascii || isdraw || isbinary || issocket) {
1474     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1475   }
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatSOR_MPIAIJ"
1481 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1482 {
1483   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1484   PetscErrorCode ierr;
1485   Vec            bb1 = 0;
1486   PetscBool      hasop;
1487 
1488   PetscFunctionBegin;
1489   if (flag == SOR_APPLY_UPPER) {
1490     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1491     PetscFunctionReturn(0);
1492   }
1493 
1494   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1495     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1496   }
1497 
1498   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1499     if (flag & SOR_ZERO_INITIAL_GUESS) {
1500       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1501       its--;
1502     }
1503 
1504     while (its--) {
1505       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1507 
1508       /* update rhs: bb1 = bb - B*x */
1509       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1510       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1511 
1512       /* local sweep */
1513       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1514     }
1515   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1516     if (flag & SOR_ZERO_INITIAL_GUESS) {
1517       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1518       its--;
1519     }
1520     while (its--) {
1521       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523 
1524       /* update rhs: bb1 = bb - B*x */
1525       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1526       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1527 
1528       /* local sweep */
1529       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1530     }
1531   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1532     if (flag & SOR_ZERO_INITIAL_GUESS) {
1533       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1534       its--;
1535     }
1536     while (its--) {
1537       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1539 
1540       /* update rhs: bb1 = bb - B*x */
1541       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1542       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1543 
1544       /* local sweep */
1545       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1546     }
1547   } else if (flag & SOR_EISENSTAT) {
1548     Vec xx1;
1549 
1550     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1551     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1552 
1553     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1554     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555     if (!mat->diag) {
1556       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1557       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1558     }
1559     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1560     if (hasop) {
1561       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1562     } else {
1563       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1564     }
1565     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1566 
1567     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1568 
1569     /* local sweep */
1570     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1571     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1572     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1573   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1574 
1575   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatPermute_MPIAIJ"
1581 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1582 {
1583   Mat            aA,aB,Aperm;
1584   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1585   PetscScalar    *aa,*ba;
1586   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1587   PetscSF        rowsf,sf;
1588   IS             parcolp = NULL;
1589   PetscBool      done;
1590   PetscErrorCode ierr;
1591 
1592   PetscFunctionBegin;
1593   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1594   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1595   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1596   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1597 
1598   /* Invert row permutation to find out where my rows should go */
1599   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1600   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1601   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1602   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1603   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1604   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1605 
1606   /* Invert column permutation to find out where my columns should go */
1607   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1608   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1609   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1610   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1611   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1612   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1613   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1614 
1615   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1616   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1617   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1618 
1619   /* Find out where my gcols should go */
1620   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1621   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1622   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1623   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1624   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1625   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1626   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1627   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1628 
1629   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1630   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1631   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1632   for (i=0; i<m; i++) {
1633     PetscInt row = rdest[i],rowner;
1634     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1635     for (j=ai[i]; j<ai[i+1]; j++) {
1636       PetscInt cowner,col = cdest[aj[j]];
1637       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1638       if (rowner == cowner) dnnz[i]++;
1639       else onnz[i]++;
1640     }
1641     for (j=bi[i]; j<bi[i+1]; j++) {
1642       PetscInt cowner,col = gcdest[bj[j]];
1643       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1644       if (rowner == cowner) dnnz[i]++;
1645       else onnz[i]++;
1646     }
1647   }
1648   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1649   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1650   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1651   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1652   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1653 
1654   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1655   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1656   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1657   for (i=0; i<m; i++) {
1658     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1659     PetscInt j0,rowlen;
1660     rowlen = ai[i+1] - ai[i];
1661     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1662       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1663       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1664     }
1665     rowlen = bi[i+1] - bi[i];
1666     for (j0=j=0; j<rowlen; j0=j) {
1667       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1668       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1669     }
1670   }
1671   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1673   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1674   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1675   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1676   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1677   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1678   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1679   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1680   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1681   *B = Aperm;
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1687 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1688 {
1689   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1690   Mat            A    = mat->A,B = mat->B;
1691   PetscErrorCode ierr;
1692   PetscReal      isend[5],irecv[5];
1693 
1694   PetscFunctionBegin;
1695   info->block_size = 1.0;
1696   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1697 
1698   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1699   isend[3] = info->memory;  isend[4] = info->mallocs;
1700 
1701   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1702 
1703   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1704   isend[3] += info->memory;  isend[4] += info->mallocs;
1705   if (flag == MAT_LOCAL) {
1706     info->nz_used      = isend[0];
1707     info->nz_allocated = isend[1];
1708     info->nz_unneeded  = isend[2];
1709     info->memory       = isend[3];
1710     info->mallocs      = isend[4];
1711   } else if (flag == MAT_GLOBAL_MAX) {
1712     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1713 
1714     info->nz_used      = irecv[0];
1715     info->nz_allocated = irecv[1];
1716     info->nz_unneeded  = irecv[2];
1717     info->memory       = irecv[3];
1718     info->mallocs      = irecv[4];
1719   } else if (flag == MAT_GLOBAL_SUM) {
1720     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1721 
1722     info->nz_used      = irecv[0];
1723     info->nz_allocated = irecv[1];
1724     info->nz_unneeded  = irecv[2];
1725     info->memory       = irecv[3];
1726     info->mallocs      = irecv[4];
1727   }
1728   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1729   info->fill_ratio_needed = 0;
1730   info->factor_mallocs    = 0;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatSetOption_MPIAIJ"
1736 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1737 {
1738   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   switch (op) {
1743   case MAT_NEW_NONZERO_LOCATIONS:
1744   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1745   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1746   case MAT_KEEP_NONZERO_PATTERN:
1747   case MAT_NEW_NONZERO_LOCATION_ERR:
1748   case MAT_USE_INODES:
1749   case MAT_IGNORE_ZERO_ENTRIES:
1750     MatCheckPreallocated(A,1);
1751     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1752     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1753     break;
1754   case MAT_ROW_ORIENTED:
1755     a->roworiented = flg;
1756 
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1759     break;
1760   case MAT_NEW_DIAGONALS:
1761     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1762     break;
1763   case MAT_IGNORE_OFF_PROC_ENTRIES:
1764     a->donotstash = flg;
1765     break;
1766   case MAT_SPD:
1767     A->spd_set = PETSC_TRUE;
1768     A->spd     = flg;
1769     if (flg) {
1770       A->symmetric                  = PETSC_TRUE;
1771       A->structurally_symmetric     = PETSC_TRUE;
1772       A->symmetric_set              = PETSC_TRUE;
1773       A->structurally_symmetric_set = PETSC_TRUE;
1774     }
1775     break;
1776   case MAT_SYMMETRIC:
1777     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1778     break;
1779   case MAT_STRUCTURALLY_SYMMETRIC:
1780     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1781     break;
1782   case MAT_HERMITIAN:
1783     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1784     break;
1785   case MAT_SYMMETRY_ETERNAL:
1786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1787     break;
1788   default:
1789     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1790   }
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatGetRow_MPIAIJ"
1796 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1797 {
1798   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1799   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1800   PetscErrorCode ierr;
1801   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1802   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1803   PetscInt       *cmap,*idx_p;
1804 
1805   PetscFunctionBegin;
1806   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1807   mat->getrowactive = PETSC_TRUE;
1808 
1809   if (!mat->rowvalues && (idx || v)) {
1810     /*
1811         allocate enough space to hold information from the longest row.
1812     */
1813     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1814     PetscInt   max = 1,tmp;
1815     for (i=0; i<matin->rmap->n; i++) {
1816       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1817       if (max < tmp) max = tmp;
1818     }
1819     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1820   }
1821 
1822   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1823   lrow = row - rstart;
1824 
1825   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1826   if (!v)   {pvA = 0; pvB = 0;}
1827   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1828   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1829   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1830   nztot = nzA + nzB;
1831 
1832   cmap = mat->garray;
1833   if (v  || idx) {
1834     if (nztot) {
1835       /* Sort by increasing column numbers, assuming A and B already sorted */
1836       PetscInt imark = -1;
1837       if (v) {
1838         *v = v_p = mat->rowvalues;
1839         for (i=0; i<nzB; i++) {
1840           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1841           else break;
1842         }
1843         imark = i;
1844         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1845         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1846       }
1847       if (idx) {
1848         *idx = idx_p = mat->rowindices;
1849         if (imark > -1) {
1850           for (i=0; i<imark; i++) {
1851             idx_p[i] = cmap[cworkB[i]];
1852           }
1853         } else {
1854           for (i=0; i<nzB; i++) {
1855             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1856             else break;
1857           }
1858           imark = i;
1859         }
1860         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1861         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1862       }
1863     } else {
1864       if (idx) *idx = 0;
1865       if (v)   *v   = 0;
1866     }
1867   }
1868   *nz  = nztot;
1869   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1870   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1876 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1877 {
1878   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1879 
1880   PetscFunctionBegin;
1881   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1882   aij->getrowactive = PETSC_FALSE;
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatNorm_MPIAIJ"
1888 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1889 {
1890   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1891   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1892   PetscErrorCode ierr;
1893   PetscInt       i,j,cstart = mat->cmap->rstart;
1894   PetscReal      sum = 0.0;
1895   MatScalar      *v;
1896 
1897   PetscFunctionBegin;
1898   if (aij->size == 1) {
1899     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1900   } else {
1901     if (type == NORM_FROBENIUS) {
1902       v = amat->a;
1903       for (i=0; i<amat->nz; i++) {
1904         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1905       }
1906       v = bmat->a;
1907       for (i=0; i<bmat->nz; i++) {
1908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1909       }
1910       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1911       *norm = PetscSqrtReal(*norm);
1912     } else if (type == NORM_1) { /* max column norm */
1913       PetscReal *tmp,*tmp2;
1914       PetscInt  *jj,*garray = aij->garray;
1915       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1916       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1917       *norm = 0.0;
1918       v     = amat->a; jj = amat->j;
1919       for (j=0; j<amat->nz; j++) {
1920         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1921       }
1922       v = bmat->a; jj = bmat->j;
1923       for (j=0; j<bmat->nz; j++) {
1924         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1925       }
1926       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1927       for (j=0; j<mat->cmap->N; j++) {
1928         if (tmp2[j] > *norm) *norm = tmp2[j];
1929       }
1930       ierr = PetscFree(tmp);CHKERRQ(ierr);
1931       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1932     } else if (type == NORM_INFINITY) { /* max row norm */
1933       PetscReal ntemp = 0.0;
1934       for (j=0; j<aij->A->rmap->n; j++) {
1935         v   = amat->a + amat->i[j];
1936         sum = 0.0;
1937         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1938           sum += PetscAbsScalar(*v); v++;
1939         }
1940         v = bmat->a + bmat->i[j];
1941         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1942           sum += PetscAbsScalar(*v); v++;
1943         }
1944         if (sum > ntemp) ntemp = sum;
1945       }
1946       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1947     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1948   }
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatTranspose_MPIAIJ"
1954 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1955 {
1956   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1957   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1958   PetscErrorCode ierr;
1959   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1960   PetscInt       cstart = A->cmap->rstart,ncol;
1961   Mat            B;
1962   MatScalar      *array;
1963 
1964   PetscFunctionBegin;
1965   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1966 
1967   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1968   ai = Aloc->i; aj = Aloc->j;
1969   bi = Bloc->i; bj = Bloc->j;
1970   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1971     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1972     PetscSFNode          *oloc;
1973     PETSC_UNUSED PetscSF sf;
1974 
1975     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1976     /* compute d_nnz for preallocation */
1977     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1978     for (i=0; i<ai[ma]; i++) {
1979       d_nnz[aj[i]]++;
1980       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1981     }
1982     /* compute local off-diagonal contributions */
1983     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1984     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1985     /* map those to global */
1986     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1987     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1988     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1989     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1990     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1991     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1992     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1993 
1994     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1995     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1996     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1997     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1998     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1999     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2000   } else {
2001     B    = *matout;
2002     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2003     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2004   }
2005 
2006   /* copy over the A part */
2007   array = Aloc->a;
2008   row   = A->rmap->rstart;
2009   for (i=0; i<ma; i++) {
2010     ncol = ai[i+1]-ai[i];
2011     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2012     row++;
2013     array += ncol; aj += ncol;
2014   }
2015   aj = Aloc->j;
2016   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2017 
2018   /* copy over the B part */
2019   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2020   array = Bloc->a;
2021   row   = A->rmap->rstart;
2022   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2023   cols_tmp = cols;
2024   for (i=0; i<mb; i++) {
2025     ncol = bi[i+1]-bi[i];
2026     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2027     row++;
2028     array += ncol; cols_tmp += ncol;
2029   }
2030   ierr = PetscFree(cols);CHKERRQ(ierr);
2031 
2032   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2033   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2034   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2035     *matout = B;
2036   } else {
2037     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2038   }
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2044 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2045 {
2046   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2047   Mat            a    = aij->A,b = aij->B;
2048   PetscErrorCode ierr;
2049   PetscInt       s1,s2,s3;
2050 
2051   PetscFunctionBegin;
2052   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2053   if (rr) {
2054     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2055     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2056     /* Overlap communication with computation. */
2057     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2058   }
2059   if (ll) {
2060     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2061     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2062     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2063   }
2064   /* scale  the diagonal block */
2065   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2066 
2067   if (rr) {
2068     /* Do a scatter end and then right scale the off-diagonal block */
2069     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2070     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2077 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2078 {
2079   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2080   PetscErrorCode ierr;
2081 
2082   PetscFunctionBegin;
2083   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatEqual_MPIAIJ"
2089 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2090 {
2091   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2092   Mat            a,b,c,d;
2093   PetscBool      flg;
2094   PetscErrorCode ierr;
2095 
2096   PetscFunctionBegin;
2097   a = matA->A; b = matA->B;
2098   c = matB->A; d = matB->B;
2099 
2100   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2101   if (flg) {
2102     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2103   }
2104   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatCopy_MPIAIJ"
2110 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2111 {
2112   PetscErrorCode ierr;
2113   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2114   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2115 
2116   PetscFunctionBegin;
2117   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2118   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2119     /* because of the column compression in the off-processor part of the matrix a->B,
2120        the number of columns in a->B and b->B may be different, hence we cannot call
2121        the MatCopy() directly on the two parts. If need be, we can provide a more
2122        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2123        then copying the submatrices */
2124     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2125   } else {
2126     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2127     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2128   }
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNCT__
2133 #define __FUNCT__ "MatSetUp_MPIAIJ"
2134 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2135 {
2136   PetscErrorCode ierr;
2137 
2138   PetscFunctionBegin;
2139   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 /*
2144    Computes the number of nonzeros per row needed for preallocation when X and Y
2145    have different nonzero structure.
2146 */
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2149 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2150 {
2151   PetscInt       i,j,k,nzx,nzy;
2152 
2153   PetscFunctionBegin;
2154   /* Set the number of nonzeros in the new matrix */
2155   for (i=0; i<m; i++) {
2156     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2157     nzx = xi[i+1] - xi[i];
2158     nzy = yi[i+1] - yi[i];
2159     nnz[i] = 0;
2160     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2161       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2162       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2163       nnz[i]++;
2164     }
2165     for (; k<nzy; k++) nnz[i]++;
2166   }
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2173 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2174 {
2175   PetscErrorCode ierr;
2176   PetscInt       m = Y->rmap->N;
2177   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2178   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2179 
2180   PetscFunctionBegin;
2181   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatAXPY_MPIAIJ"
2187 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2188 {
2189   PetscErrorCode ierr;
2190   PetscInt       i;
2191   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2192   PetscBLASInt   bnz,one=1;
2193   Mat_SeqAIJ     *x,*y;
2194 
2195   PetscFunctionBegin;
2196   if (str == SAME_NONZERO_PATTERN) {
2197     PetscScalar alpha = a;
2198     x    = (Mat_SeqAIJ*)xx->A->data;
2199     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2200     y    = (Mat_SeqAIJ*)yy->A->data;
2201     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2202     x    = (Mat_SeqAIJ*)xx->B->data;
2203     y    = (Mat_SeqAIJ*)yy->B->data;
2204     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2205     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2206     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2207   } else if (str == SUBSET_NONZERO_PATTERN) {
2208     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2209 
2210     x = (Mat_SeqAIJ*)xx->B->data;
2211     y = (Mat_SeqAIJ*)yy->B->data;
2212     if (y->xtoy && y->XtoY != xx->B) {
2213       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2214       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2215     }
2216     if (!y->xtoy) { /* get xtoy */
2217       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2218       y->XtoY = xx->B;
2219       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2220     }
2221     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2222     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2223   } else {
2224     Mat      B;
2225     PetscInt *nnz_d,*nnz_o;
2226     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2227     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2228     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2229     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2230     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2231     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2232     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2233     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2234     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2235     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2236     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2237     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2238     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2239     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2240   }
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatConjugate_MPIAIJ"
2248 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2249 {
2250 #if defined(PETSC_USE_COMPLEX)
2251   PetscErrorCode ierr;
2252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2253 
2254   PetscFunctionBegin;
2255   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2256   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2257 #else
2258   PetscFunctionBegin;
2259 #endif
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatRealPart_MPIAIJ"
2265 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2266 {
2267   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2268   PetscErrorCode ierr;
2269 
2270   PetscFunctionBegin;
2271   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2272   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 #undef __FUNCT__
2277 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2278 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2279 {
2280   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2281   PetscErrorCode ierr;
2282 
2283   PetscFunctionBegin;
2284   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2285   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #if defined(PETSC_HAVE_PBGL)
2290 
2291 #include <boost/parallel/mpi/bsp_process_group.hpp>
2292 #include <boost/graph/distributed/ilu_default_graph.hpp>
2293 #include <boost/graph/distributed/ilu_0_block.hpp>
2294 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2295 #include <boost/graph/distributed/petsc/interface.hpp>
2296 #include <boost/multi_array.hpp>
2297 #include <boost/parallel/distributed_property_map->hpp>
2298 
2299 #undef __FUNCT__
2300 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2301 /*
2302   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2303 */
2304 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2305 {
2306   namespace petsc = boost::distributed::petsc;
2307 
2308   namespace graph_dist = boost::graph::distributed;
2309   using boost::graph::distributed::ilu_default::process_group_type;
2310   using boost::graph::ilu_permuted;
2311 
2312   PetscBool      row_identity, col_identity;
2313   PetscContainer c;
2314   PetscInt       m, n, M, N;
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2319   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2320   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2321   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2322 
2323   process_group_type pg;
2324   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2325   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2326   lgraph_type& level_graph = *lgraph_p;
2327   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2328 
2329   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2330   ilu_permuted(level_graph);
2331 
2332   /* put together the new matrix */
2333   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2334   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2335   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2336   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2337   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2338   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2339   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2340   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341 
2342   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2343   ierr = PetscContainerSetPointer(c, lgraph_p);
2344   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2345   ierr = PetscContainerDestroy(&c);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2351 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2352 {
2353   PetscFunctionBegin;
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 #undef __FUNCT__
2358 #define __FUNCT__ "MatSolve_MPIAIJ"
2359 /*
2360   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2361 */
2362 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2363 {
2364   namespace graph_dist = boost::graph::distributed;
2365 
2366   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2367   lgraph_type    *lgraph_p;
2368   PetscContainer c;
2369   PetscErrorCode ierr;
2370 
2371   PetscFunctionBegin;
2372   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2373   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2374   ierr = VecCopy(b, x);CHKERRQ(ierr);
2375 
2376   PetscScalar *array_x;
2377   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2378   PetscInt sx;
2379   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2380 
2381   PetscScalar *array_b;
2382   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2383   PetscInt sb;
2384   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2385 
2386   lgraph_type& level_graph = *lgraph_p;
2387   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2388 
2389   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2390   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2391   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2392 
2393   typedef boost::iterator_property_map<array_ref_type::iterator,
2394                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2395   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2396   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2397 
2398   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2399   PetscFunctionReturn(0);
2400 }
2401 #endif
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2405 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2406 {
2407   PetscErrorCode ierr;
2408   MPI_Comm       comm;
2409   PetscMPIInt    size;
2410   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N,bs=mat->rmap->bs;
2411   Mat_Redundant  *redund=NULL;
2412   PetscSubcomm   psubcomm=NULL;
2413   MPI_Comm       subcomm_in=subcomm;
2414   Mat            *matseq;
2415   IS             isrow,iscol;
2416   PetscBool      newsubcomm=PETSC_FALSE;
2417 
2418   PetscFunctionBegin;
2419   if (subcomm_in == MPI_COMM_NULL && reuse == MAT_INITIAL_MATRIX) { /* get subcomm if user does not provide subcomm */
2420     /* create psubcomm, then get subcomm */
2421     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2422     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2423     if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2424 
2425     ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2426     ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2427     ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2428     ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2429     ierr = PetscCommDuplicate(psubcomm->comm,&subcomm,NULL);CHKERRQ(ierr);
2430     newsubcomm = PETSC_TRUE;
2431     ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr);
2432   }
2433 
2434   /* get isrow, iscol and a local sequential matrix matseq[0] */
2435   if (reuse == MAT_INITIAL_MATRIX) {
2436     mloc_sub = PETSC_DECIDE;
2437     ierr = PetscSplitOwnershipBlock(subcomm,bs,&mloc_sub,&M);CHKERRQ(ierr);
2438     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2439     rstart = rend - mloc_sub;
2440     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2441     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2442   } else { /* reuse == MAT_REUSE_MATRIX */
2443     /* retrieve subcomm */
2444     ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2445     redund = (*matredundant)->redundant;
2446     isrow  = redund->isrow;
2447     iscol  = redund->iscol;
2448     matseq = redund->matseq;
2449   }
2450   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2451 
2452   /* get matredundant over subcomm */
2453   ierr = MatCreateMPIMatConcatenateSeqMat(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2454 
2455   if (reuse == MAT_INITIAL_MATRIX) {
2456     /* create a supporting struct and attach it to C for reuse */
2457     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2458     (*matredundant)->redundant = redund;
2459     redund->isrow              = isrow;
2460     redund->iscol              = iscol;
2461     redund->matseq             = matseq;
2462     if (newsubcomm) {
2463       redund->subcomm          = subcomm;
2464     } else {
2465       redund->subcomm          = MPI_COMM_NULL;
2466     }
2467   }
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2473 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2474 {
2475   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2476   PetscErrorCode ierr;
2477   PetscInt       i,*idxb = 0;
2478   PetscScalar    *va,*vb;
2479   Vec            vtmp;
2480 
2481   PetscFunctionBegin;
2482   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2483   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2484   if (idx) {
2485     for (i=0; i<A->rmap->n; i++) {
2486       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2487     }
2488   }
2489 
2490   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2491   if (idx) {
2492     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2493   }
2494   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2495   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2496 
2497   for (i=0; i<A->rmap->n; i++) {
2498     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2499       va[i] = vb[i];
2500       if (idx) idx[i] = a->garray[idxb[i]];
2501     }
2502   }
2503 
2504   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2505   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2506   ierr = PetscFree(idxb);CHKERRQ(ierr);
2507   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 #undef __FUNCT__
2512 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2513 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2514 {
2515   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2516   PetscErrorCode ierr;
2517   PetscInt       i,*idxb = 0;
2518   PetscScalar    *va,*vb;
2519   Vec            vtmp;
2520 
2521   PetscFunctionBegin;
2522   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2523   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2524   if (idx) {
2525     for (i=0; i<A->cmap->n; i++) {
2526       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2527     }
2528   }
2529 
2530   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2531   if (idx) {
2532     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2533   }
2534   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2535   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2536 
2537   for (i=0; i<A->rmap->n; i++) {
2538     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2539       va[i] = vb[i];
2540       if (idx) idx[i] = a->garray[idxb[i]];
2541     }
2542   }
2543 
2544   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2545   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2546   ierr = PetscFree(idxb);CHKERRQ(ierr);
2547   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 #undef __FUNCT__
2552 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2553 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2554 {
2555   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2556   PetscInt       n      = A->rmap->n;
2557   PetscInt       cstart = A->cmap->rstart;
2558   PetscInt       *cmap  = mat->garray;
2559   PetscInt       *diagIdx, *offdiagIdx;
2560   Vec            diagV, offdiagV;
2561   PetscScalar    *a, *diagA, *offdiagA;
2562   PetscInt       r;
2563   PetscErrorCode ierr;
2564 
2565   PetscFunctionBegin;
2566   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2567   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2568   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2569   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2570   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2571   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2572   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2573   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2574   for (r = 0; r < n; ++r) {
2575     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2576       a[r]   = diagA[r];
2577       idx[r] = cstart + diagIdx[r];
2578     } else {
2579       a[r]   = offdiagA[r];
2580       idx[r] = cmap[offdiagIdx[r]];
2581     }
2582   }
2583   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2584   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2585   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2586   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2587   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2588   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2589   PetscFunctionReturn(0);
2590 }
2591 
2592 #undef __FUNCT__
2593 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2594 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2595 {
2596   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2597   PetscInt       n      = A->rmap->n;
2598   PetscInt       cstart = A->cmap->rstart;
2599   PetscInt       *cmap  = mat->garray;
2600   PetscInt       *diagIdx, *offdiagIdx;
2601   Vec            diagV, offdiagV;
2602   PetscScalar    *a, *diagA, *offdiagA;
2603   PetscInt       r;
2604   PetscErrorCode ierr;
2605 
2606   PetscFunctionBegin;
2607   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2608   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2609   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2610   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2611   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2612   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2613   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2614   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2615   for (r = 0; r < n; ++r) {
2616     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2617       a[r]   = diagA[r];
2618       idx[r] = cstart + diagIdx[r];
2619     } else {
2620       a[r]   = offdiagA[r];
2621       idx[r] = cmap[offdiagIdx[r]];
2622     }
2623   }
2624   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2625   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2626   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2627   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2628   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2629   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 #undef __FUNCT__
2634 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2635 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2636 {
2637   PetscErrorCode ierr;
2638   Mat            *dummy;
2639 
2640   PetscFunctionBegin;
2641   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2642   *newmat = *dummy;
2643   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 #undef __FUNCT__
2648 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2649 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2650 {
2651   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2652   PetscErrorCode ierr;
2653 
2654   PetscFunctionBegin;
2655   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2656   PetscFunctionReturn(0);
2657 }
2658 
2659 #undef __FUNCT__
2660 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2661 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2662 {
2663   PetscErrorCode ierr;
2664   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2665 
2666   PetscFunctionBegin;
2667   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2668   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2669   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2670   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 /* -------------------------------------------------------------------*/
2675 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2676                                        MatGetRow_MPIAIJ,
2677                                        MatRestoreRow_MPIAIJ,
2678                                        MatMult_MPIAIJ,
2679                                 /* 4*/ MatMultAdd_MPIAIJ,
2680                                        MatMultTranspose_MPIAIJ,
2681                                        MatMultTransposeAdd_MPIAIJ,
2682 #if defined(PETSC_HAVE_PBGL)
2683                                        MatSolve_MPIAIJ,
2684 #else
2685                                        0,
2686 #endif
2687                                        0,
2688                                        0,
2689                                 /*10*/ 0,
2690                                        0,
2691                                        0,
2692                                        MatSOR_MPIAIJ,
2693                                        MatTranspose_MPIAIJ,
2694                                 /*15*/ MatGetInfo_MPIAIJ,
2695                                        MatEqual_MPIAIJ,
2696                                        MatGetDiagonal_MPIAIJ,
2697                                        MatDiagonalScale_MPIAIJ,
2698                                        MatNorm_MPIAIJ,
2699                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2700                                        MatAssemblyEnd_MPIAIJ,
2701                                        MatSetOption_MPIAIJ,
2702                                        MatZeroEntries_MPIAIJ,
2703                                 /*24*/ MatZeroRows_MPIAIJ,
2704                                        0,
2705 #if defined(PETSC_HAVE_PBGL)
2706                                        0,
2707 #else
2708                                        0,
2709 #endif
2710                                        0,
2711                                        0,
2712                                 /*29*/ MatSetUp_MPIAIJ,
2713 #if defined(PETSC_HAVE_PBGL)
2714                                        0,
2715 #else
2716                                        0,
2717 #endif
2718                                        0,
2719                                        0,
2720                                        0,
2721                                 /*34*/ MatDuplicate_MPIAIJ,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                        0,
2726                                 /*39*/ MatAXPY_MPIAIJ,
2727                                        MatGetSubMatrices_MPIAIJ,
2728                                        MatIncreaseOverlap_MPIAIJ,
2729                                        MatGetValues_MPIAIJ,
2730                                        MatCopy_MPIAIJ,
2731                                 /*44*/ MatGetRowMax_MPIAIJ,
2732                                        MatScale_MPIAIJ,
2733                                        0,
2734                                        MatDiagonalSet_MPIAIJ,
2735                                        MatZeroRowsColumns_MPIAIJ,
2736                                 /*49*/ MatSetRandom_MPIAIJ,
2737                                        0,
2738                                        0,
2739                                        0,
2740                                        0,
2741                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2742                                        0,
2743                                        MatSetUnfactored_MPIAIJ,
2744                                        MatPermute_MPIAIJ,
2745                                        0,
2746                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2747                                        MatDestroy_MPIAIJ,
2748                                        MatView_MPIAIJ,
2749                                        0,
2750                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2751                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2752                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2753                                        0,
2754                                        0,
2755                                        0,
2756                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2757                                        MatGetRowMinAbs_MPIAIJ,
2758                                        0,
2759                                        MatSetColoring_MPIAIJ,
2760                                        0,
2761                                        MatSetValuesAdifor_MPIAIJ,
2762                                 /*75*/ MatFDColoringApply_AIJ,
2763                                        0,
2764                                        0,
2765                                        0,
2766                                        MatFindZeroDiagonals_MPIAIJ,
2767                                 /*80*/ 0,
2768                                        0,
2769                                        0,
2770                                 /*83*/ MatLoad_MPIAIJ,
2771                                        0,
2772                                        0,
2773                                        0,
2774                                        0,
2775                                        0,
2776                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2777                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2778                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2779                                        MatPtAP_MPIAIJ_MPIAIJ,
2780                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2781                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2782                                        0,
2783                                        0,
2784                                        0,
2785                                        0,
2786                                 /*99*/ 0,
2787                                        0,
2788                                        0,
2789                                        MatConjugate_MPIAIJ,
2790                                        0,
2791                                 /*104*/MatSetValuesRow_MPIAIJ,
2792                                        MatRealPart_MPIAIJ,
2793                                        MatImaginaryPart_MPIAIJ,
2794                                        0,
2795                                        0,
2796                                 /*109*/0,
2797                                        MatGetRedundantMatrix_MPIAIJ,
2798                                        MatGetRowMin_MPIAIJ,
2799                                        0,
2800                                        0,
2801                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2802                                        0,
2803                                        0,
2804                                        0,
2805                                        0,
2806                                 /*119*/0,
2807                                        0,
2808                                        0,
2809                                        0,
2810                                        MatGetMultiProcBlock_MPIAIJ,
2811                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2812                                        MatGetColumnNorms_MPIAIJ,
2813                                        MatInvertBlockDiagonal_MPIAIJ,
2814                                        0,
2815                                        MatGetSubMatricesParallel_MPIAIJ,
2816                                 /*129*/0,
2817                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2818                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2819                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2820                                        0,
2821                                 /*134*/0,
2822                                        0,
2823                                        0,
2824                                        0,
2825                                        0,
2826                                 /*139*/0,
2827                                        0,
2828                                        0,
2829                                        MatFDColoringSetUp_MPIXAIJ,
2830                                        0,
2831                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2832 };
2833 
2834 /* ----------------------------------------------------------------------------------------*/
2835 
2836 #undef __FUNCT__
2837 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2838 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2839 {
2840   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2841   PetscErrorCode ierr;
2842 
2843   PetscFunctionBegin;
2844   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2845   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2846   PetscFunctionReturn(0);
2847 }
2848 
2849 #undef __FUNCT__
2850 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2851 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2852 {
2853   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2854   PetscErrorCode ierr;
2855 
2856   PetscFunctionBegin;
2857   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2858   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 #undef __FUNCT__
2863 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2864 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2865 {
2866   Mat_MPIAIJ     *b;
2867   PetscErrorCode ierr;
2868 
2869   PetscFunctionBegin;
2870   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2871   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2872   b = (Mat_MPIAIJ*)B->data;
2873 
2874   if (!B->preallocated) {
2875     /* Explicitly create 2 MATSEQAIJ matrices. */
2876     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2877     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2878     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2879     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2880     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2881     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2882     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2883     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2884     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2885     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2886   }
2887 
2888   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2889   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2890   B->preallocated = PETSC_TRUE;
2891   PetscFunctionReturn(0);
2892 }
2893 
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2896 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2897 {
2898   Mat            mat;
2899   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2900   PetscErrorCode ierr;
2901 
2902   PetscFunctionBegin;
2903   *newmat = 0;
2904   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2905   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2906   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2907   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2908   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2909   a       = (Mat_MPIAIJ*)mat->data;
2910 
2911   mat->factortype   = matin->factortype;
2912   mat->assembled    = PETSC_TRUE;
2913   mat->insertmode   = NOT_SET_VALUES;
2914   mat->preallocated = PETSC_TRUE;
2915 
2916   a->size         = oldmat->size;
2917   a->rank         = oldmat->rank;
2918   a->donotstash   = oldmat->donotstash;
2919   a->roworiented  = oldmat->roworiented;
2920   a->rowindices   = 0;
2921   a->rowvalues    = 0;
2922   a->getrowactive = PETSC_FALSE;
2923 
2924   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2925   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2926 
2927   if (oldmat->colmap) {
2928 #if defined(PETSC_USE_CTABLE)
2929     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2930 #else
2931     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
2932     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2933     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2934 #endif
2935   } else a->colmap = 0;
2936   if (oldmat->garray) {
2937     PetscInt len;
2938     len  = oldmat->B->cmap->n;
2939     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
2940     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2941     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2942   } else a->garray = 0;
2943 
2944   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2945   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2946   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2947   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2948   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2949   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2950   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2951   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2952   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2953   *newmat = mat;
2954   PetscFunctionReturn(0);
2955 }
2956 
2957 
2958 
2959 #undef __FUNCT__
2960 #define __FUNCT__ "MatLoad_MPIAIJ"
2961 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2962 {
2963   PetscScalar    *vals,*svals;
2964   MPI_Comm       comm;
2965   PetscErrorCode ierr;
2966   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2967   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2968   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2969   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2970   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2971   int            fd;
2972   PetscInt       bs = 1;
2973 
2974   PetscFunctionBegin;
2975   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2976   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2977   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2978   if (!rank) {
2979     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2980     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2981     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2982   }
2983 
2984   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
2985   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2986   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2987 
2988   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2989 
2990   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2991   M    = header[1]; N = header[2];
2992   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2993   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2994   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2995 
2996   /* If global sizes are set, check if they are consistent with that given in the file */
2997   if (sizesset) {
2998     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2999   }
3000   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3001   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3002 
3003   /* determine ownership of all (block) rows */
3004   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3005   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3006   else m = newMat->rmap->n; /* Set by user */
3007 
3008   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3009   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3010 
3011   /* First process needs enough room for process with most rows */
3012   if (!rank) {
3013     mmax = rowners[1];
3014     for (i=2; i<=size; i++) {
3015       mmax = PetscMax(mmax, rowners[i]);
3016     }
3017   } else mmax = -1;             /* unused, but compilers complain */
3018 
3019   rowners[0] = 0;
3020   for (i=2; i<=size; i++) {
3021     rowners[i] += rowners[i-1];
3022   }
3023   rstart = rowners[rank];
3024   rend   = rowners[rank+1];
3025 
3026   /* distribute row lengths to all processors */
3027   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3028   if (!rank) {
3029     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3030     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3031     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3032     for (j=0; j<m; j++) {
3033       procsnz[0] += ourlens[j];
3034     }
3035     for (i=1; i<size; i++) {
3036       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3037       /* calculate the number of nonzeros on each processor */
3038       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3039         procsnz[i] += rowlengths[j];
3040       }
3041       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3042     }
3043     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3044   } else {
3045     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3046   }
3047 
3048   if (!rank) {
3049     /* determine max buffer needed and allocate it */
3050     maxnz = 0;
3051     for (i=0; i<size; i++) {
3052       maxnz = PetscMax(maxnz,procsnz[i]);
3053     }
3054     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3055 
3056     /* read in my part of the matrix column indices  */
3057     nz   = procsnz[0];
3058     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3059     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3060 
3061     /* read in every one elses and ship off */
3062     for (i=1; i<size; i++) {
3063       nz   = procsnz[i];
3064       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3065       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3066     }
3067     ierr = PetscFree(cols);CHKERRQ(ierr);
3068   } else {
3069     /* determine buffer space needed for message */
3070     nz = 0;
3071     for (i=0; i<m; i++) {
3072       nz += ourlens[i];
3073     }
3074     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3075 
3076     /* receive message of column indices*/
3077     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3078   }
3079 
3080   /* determine column ownership if matrix is not square */
3081   if (N != M) {
3082     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3083     else n = newMat->cmap->n;
3084     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3085     cstart = cend - n;
3086   } else {
3087     cstart = rstart;
3088     cend   = rend;
3089     n      = cend - cstart;
3090   }
3091 
3092   /* loop over local rows, determining number of off diagonal entries */
3093   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3094   jj   = 0;
3095   for (i=0; i<m; i++) {
3096     for (j=0; j<ourlens[i]; j++) {
3097       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3098       jj++;
3099     }
3100   }
3101 
3102   for (i=0; i<m; i++) {
3103     ourlens[i] -= offlens[i];
3104   }
3105   if (!sizesset) {
3106     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3107   }
3108 
3109   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3110 
3111   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3112 
3113   for (i=0; i<m; i++) {
3114     ourlens[i] += offlens[i];
3115   }
3116 
3117   if (!rank) {
3118     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3119 
3120     /* read in my part of the matrix numerical values  */
3121     nz   = procsnz[0];
3122     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3123 
3124     /* insert into matrix */
3125     jj      = rstart;
3126     smycols = mycols;
3127     svals   = vals;
3128     for (i=0; i<m; i++) {
3129       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3130       smycols += ourlens[i];
3131       svals   += ourlens[i];
3132       jj++;
3133     }
3134 
3135     /* read in other processors and ship out */
3136     for (i=1; i<size; i++) {
3137       nz   = procsnz[i];
3138       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3139       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3140     }
3141     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3142   } else {
3143     /* receive numeric values */
3144     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3145 
3146     /* receive message of values*/
3147     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3148 
3149     /* insert into matrix */
3150     jj      = rstart;
3151     smycols = mycols;
3152     svals   = vals;
3153     for (i=0; i<m; i++) {
3154       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3155       smycols += ourlens[i];
3156       svals   += ourlens[i];
3157       jj++;
3158     }
3159   }
3160   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3161   ierr = PetscFree(vals);CHKERRQ(ierr);
3162   ierr = PetscFree(mycols);CHKERRQ(ierr);
3163   ierr = PetscFree(rowners);CHKERRQ(ierr);
3164   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3165   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3166   PetscFunctionReturn(0);
3167 }
3168 
3169 #undef __FUNCT__
3170 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3171 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3172 {
3173   PetscErrorCode ierr;
3174   IS             iscol_local;
3175   PetscInt       csize;
3176 
3177   PetscFunctionBegin;
3178   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3179   if (call == MAT_REUSE_MATRIX) {
3180     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3181     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3182   } else {
3183     PetscInt cbs;
3184     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3185     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3186     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3187   }
3188   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3189   if (call == MAT_INITIAL_MATRIX) {
3190     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3191     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3192   }
3193   PetscFunctionReturn(0);
3194 }
3195 
3196 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3197 #undef __FUNCT__
3198 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3199 /*
3200     Not great since it makes two copies of the submatrix, first an SeqAIJ
3201   in local and then by concatenating the local matrices the end result.
3202   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3203 
3204   Note: This requires a sequential iscol with all indices.
3205 */
3206 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3207 {
3208   PetscErrorCode ierr;
3209   PetscMPIInt    rank,size;
3210   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3211   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3212   PetscBool      allcolumns, colflag;
3213   Mat            M,Mreuse;
3214   MatScalar      *vwork,*aa;
3215   MPI_Comm       comm;
3216   Mat_SeqAIJ     *aij;
3217 
3218   PetscFunctionBegin;
3219   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3220   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3221   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3222 
3223   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3224   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3225   if (colflag && ncol == mat->cmap->N) {
3226     allcolumns = PETSC_TRUE;
3227   } else {
3228     allcolumns = PETSC_FALSE;
3229   }
3230   if (call ==  MAT_REUSE_MATRIX) {
3231     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3232     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3233     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3234   } else {
3235     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3236   }
3237 
3238   /*
3239       m - number of local rows
3240       n - number of columns (same on all processors)
3241       rstart - first row in new global matrix generated
3242   */
3243   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3244   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3245   if (call == MAT_INITIAL_MATRIX) {
3246     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3247     ii  = aij->i;
3248     jj  = aij->j;
3249 
3250     /*
3251         Determine the number of non-zeros in the diagonal and off-diagonal
3252         portions of the matrix in order to do correct preallocation
3253     */
3254 
3255     /* first get start and end of "diagonal" columns */
3256     if (csize == PETSC_DECIDE) {
3257       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3258       if (mglobal == n) { /* square matrix */
3259         nlocal = m;
3260       } else {
3261         nlocal = n/size + ((n % size) > rank);
3262       }
3263     } else {
3264       nlocal = csize;
3265     }
3266     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3267     rstart = rend - nlocal;
3268     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3269 
3270     /* next, compute all the lengths */
3271     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3272     olens = dlens + m;
3273     for (i=0; i<m; i++) {
3274       jend = ii[i+1] - ii[i];
3275       olen = 0;
3276       dlen = 0;
3277       for (j=0; j<jend; j++) {
3278         if (*jj < rstart || *jj >= rend) olen++;
3279         else dlen++;
3280         jj++;
3281       }
3282       olens[i] = olen;
3283       dlens[i] = dlen;
3284     }
3285     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3286     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3287     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3288     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3289     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3290     ierr = PetscFree(dlens);CHKERRQ(ierr);
3291   } else {
3292     PetscInt ml,nl;
3293 
3294     M    = *newmat;
3295     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3296     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3297     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3298     /*
3299          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3300        rather than the slower MatSetValues().
3301     */
3302     M->was_assembled = PETSC_TRUE;
3303     M->assembled     = PETSC_FALSE;
3304   }
3305   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3306   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3307   ii   = aij->i;
3308   jj   = aij->j;
3309   aa   = aij->a;
3310   for (i=0; i<m; i++) {
3311     row   = rstart + i;
3312     nz    = ii[i+1] - ii[i];
3313     cwork = jj;     jj += nz;
3314     vwork = aa;     aa += nz;
3315     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3316   }
3317 
3318   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3319   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3320   *newmat = M;
3321 
3322   /* save submatrix used in processor for next request */
3323   if (call ==  MAT_INITIAL_MATRIX) {
3324     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3325     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3326   }
3327   PetscFunctionReturn(0);
3328 }
3329 
3330 #undef __FUNCT__
3331 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3332 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3333 {
3334   PetscInt       m,cstart, cend,j,nnz,i,d;
3335   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3336   const PetscInt *JJ;
3337   PetscScalar    *values;
3338   PetscErrorCode ierr;
3339 
3340   PetscFunctionBegin;
3341   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3342 
3343   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3344   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3345   m      = B->rmap->n;
3346   cstart = B->cmap->rstart;
3347   cend   = B->cmap->rend;
3348   rstart = B->rmap->rstart;
3349 
3350   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3351 
3352 #if defined(PETSC_USE_DEBUGGING)
3353   for (i=0; i<m; i++) {
3354     nnz = Ii[i+1]- Ii[i];
3355     JJ  = J + Ii[i];
3356     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3357     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3358     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3359   }
3360 #endif
3361 
3362   for (i=0; i<m; i++) {
3363     nnz     = Ii[i+1]- Ii[i];
3364     JJ      = J + Ii[i];
3365     nnz_max = PetscMax(nnz_max,nnz);
3366     d       = 0;
3367     for (j=0; j<nnz; j++) {
3368       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3369     }
3370     d_nnz[i] = d;
3371     o_nnz[i] = nnz - d;
3372   }
3373   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3374   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3375 
3376   if (v) values = (PetscScalar*)v;
3377   else {
3378     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3379   }
3380 
3381   for (i=0; i<m; i++) {
3382     ii   = i + rstart;
3383     nnz  = Ii[i+1]- Ii[i];
3384     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3385   }
3386   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3387   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3388 
3389   if (!v) {
3390     ierr = PetscFree(values);CHKERRQ(ierr);
3391   }
3392   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3393   PetscFunctionReturn(0);
3394 }
3395 
3396 #undef __FUNCT__
3397 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3398 /*@
3399    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3400    (the default parallel PETSc format).
3401 
3402    Collective on MPI_Comm
3403 
3404    Input Parameters:
3405 +  B - the matrix
3406 .  i - the indices into j for the start of each local row (starts with zero)
3407 .  j - the column indices for each local row (starts with zero)
3408 -  v - optional values in the matrix
3409 
3410    Level: developer
3411 
3412    Notes:
3413        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3414      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3415      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3416 
3417        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3418 
3419        The format which is used for the sparse matrix input, is equivalent to a
3420     row-major ordering.. i.e for the following matrix, the input data expected is
3421     as shown:
3422 
3423         1 0 0
3424         2 0 3     P0
3425        -------
3426         4 5 6     P1
3427 
3428      Process0 [P0]: rows_owned=[0,1]
3429         i =  {0,1,3}  [size = nrow+1  = 2+1]
3430         j =  {0,0,2}  [size = nz = 6]
3431         v =  {1,2,3}  [size = nz = 6]
3432 
3433      Process1 [P1]: rows_owned=[2]
3434         i =  {0,3}    [size = nrow+1  = 1+1]
3435         j =  {0,1,2}  [size = nz = 6]
3436         v =  {4,5,6}  [size = nz = 6]
3437 
3438 .keywords: matrix, aij, compressed row, sparse, parallel
3439 
3440 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3441           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3442 @*/
3443 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3444 {
3445   PetscErrorCode ierr;
3446 
3447   PetscFunctionBegin;
3448   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3449   PetscFunctionReturn(0);
3450 }
3451 
3452 #undef __FUNCT__
3453 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3454 /*@C
3455    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3456    (the default parallel PETSc format).  For good matrix assembly performance
3457    the user should preallocate the matrix storage by setting the parameters
3458    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3459    performance can be increased by more than a factor of 50.
3460 
3461    Collective on MPI_Comm
3462 
3463    Input Parameters:
3464 +  B - the matrix
3465 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3466            (same value is used for all local rows)
3467 .  d_nnz - array containing the number of nonzeros in the various rows of the
3468            DIAGONAL portion of the local submatrix (possibly different for each row)
3469            or NULL, if d_nz is used to specify the nonzero structure.
3470            The size of this array is equal to the number of local rows, i.e 'm'.
3471            For matrices that will be factored, you must leave room for (and set)
3472            the diagonal entry even if it is zero.
3473 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3474            submatrix (same value is used for all local rows).
3475 -  o_nnz - array containing the number of nonzeros in the various rows of the
3476            OFF-DIAGONAL portion of the local submatrix (possibly different for
3477            each row) or NULL, if o_nz is used to specify the nonzero
3478            structure. The size of this array is equal to the number
3479            of local rows, i.e 'm'.
3480 
3481    If the *_nnz parameter is given then the *_nz parameter is ignored
3482 
3483    The AIJ format (also called the Yale sparse matrix format or
3484    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3485    storage.  The stored row and column indices begin with zero.
3486    See Users-Manual: ch_mat for details.
3487 
3488    The parallel matrix is partitioned such that the first m0 rows belong to
3489    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3490    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3491 
3492    The DIAGONAL portion of the local submatrix of a processor can be defined
3493    as the submatrix which is obtained by extraction the part corresponding to
3494    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3495    first row that belongs to the processor, r2 is the last row belonging to
3496    the this processor, and c1-c2 is range of indices of the local part of a
3497    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3498    common case of a square matrix, the row and column ranges are the same and
3499    the DIAGONAL part is also square. The remaining portion of the local
3500    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3501 
3502    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3503 
3504    You can call MatGetInfo() to get information on how effective the preallocation was;
3505    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3506    You can also run with the option -info and look for messages with the string
3507    malloc in them to see if additional memory allocation was needed.
3508 
3509    Example usage:
3510 
3511    Consider the following 8x8 matrix with 34 non-zero values, that is
3512    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3513    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3514    as follows:
3515 
3516 .vb
3517             1  2  0  |  0  3  0  |  0  4
3518     Proc0   0  5  6  |  7  0  0  |  8  0
3519             9  0 10  | 11  0  0  | 12  0
3520     -------------------------------------
3521            13  0 14  | 15 16 17  |  0  0
3522     Proc1   0 18  0  | 19 20 21  |  0  0
3523             0  0  0  | 22 23  0  | 24  0
3524     -------------------------------------
3525     Proc2  25 26 27  |  0  0 28  | 29  0
3526            30  0  0  | 31 32 33  |  0 34
3527 .ve
3528 
3529    This can be represented as a collection of submatrices as:
3530 
3531 .vb
3532       A B C
3533       D E F
3534       G H I
3535 .ve
3536 
3537    Where the submatrices A,B,C are owned by proc0, D,E,F are
3538    owned by proc1, G,H,I are owned by proc2.
3539 
3540    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3541    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3542    The 'M','N' parameters are 8,8, and have the same values on all procs.
3543 
3544    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3545    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3546    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3547    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3548    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3549    matrix, ans [DF] as another SeqAIJ matrix.
3550 
3551    When d_nz, o_nz parameters are specified, d_nz storage elements are
3552    allocated for every row of the local diagonal submatrix, and o_nz
3553    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3554    One way to choose d_nz and o_nz is to use the max nonzerors per local
3555    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3556    In this case, the values of d_nz,o_nz are:
3557 .vb
3558      proc0 : dnz = 2, o_nz = 2
3559      proc1 : dnz = 3, o_nz = 2
3560      proc2 : dnz = 1, o_nz = 4
3561 .ve
3562    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3563    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3564    for proc3. i.e we are using 12+15+10=37 storage locations to store
3565    34 values.
3566 
3567    When d_nnz, o_nnz parameters are specified, the storage is specified
3568    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3569    In the above case the values for d_nnz,o_nnz are:
3570 .vb
3571      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3572      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3573      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3574 .ve
3575    Here the space allocated is sum of all the above values i.e 34, and
3576    hence pre-allocation is perfect.
3577 
3578    Level: intermediate
3579 
3580 .keywords: matrix, aij, compressed row, sparse, parallel
3581 
3582 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3583           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3584 @*/
3585 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3586 {
3587   PetscErrorCode ierr;
3588 
3589   PetscFunctionBegin;
3590   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3591   PetscValidType(B,1);
3592   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3593   PetscFunctionReturn(0);
3594 }
3595 
3596 #undef __FUNCT__
3597 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3598 /*@
3599      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3600          CSR format the local rows.
3601 
3602    Collective on MPI_Comm
3603 
3604    Input Parameters:
3605 +  comm - MPI communicator
3606 .  m - number of local rows (Cannot be PETSC_DECIDE)
3607 .  n - This value should be the same as the local size used in creating the
3608        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3609        calculated if N is given) For square matrices n is almost always m.
3610 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3611 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3612 .   i - row indices
3613 .   j - column indices
3614 -   a - matrix values
3615 
3616    Output Parameter:
3617 .   mat - the matrix
3618 
3619    Level: intermediate
3620 
3621    Notes:
3622        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3623      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3624      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3625 
3626        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3627 
3628        The format which is used for the sparse matrix input, is equivalent to a
3629     row-major ordering.. i.e for the following matrix, the input data expected is
3630     as shown:
3631 
3632         1 0 0
3633         2 0 3     P0
3634        -------
3635         4 5 6     P1
3636 
3637      Process0 [P0]: rows_owned=[0,1]
3638         i =  {0,1,3}  [size = nrow+1  = 2+1]
3639         j =  {0,0,2}  [size = nz = 6]
3640         v =  {1,2,3}  [size = nz = 6]
3641 
3642      Process1 [P1]: rows_owned=[2]
3643         i =  {0,3}    [size = nrow+1  = 1+1]
3644         j =  {0,1,2}  [size = nz = 6]
3645         v =  {4,5,6}  [size = nz = 6]
3646 
3647 .keywords: matrix, aij, compressed row, sparse, parallel
3648 
3649 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3650           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3651 @*/
3652 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3653 {
3654   PetscErrorCode ierr;
3655 
3656   PetscFunctionBegin;
3657   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3658   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3659   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3660   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3661   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3662   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3663   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3664   PetscFunctionReturn(0);
3665 }
3666 
3667 #undef __FUNCT__
3668 #define __FUNCT__ "MatCreateAIJ"
3669 /*@C
3670    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3671    (the default parallel PETSc format).  For good matrix assembly performance
3672    the user should preallocate the matrix storage by setting the parameters
3673    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3674    performance can be increased by more than a factor of 50.
3675 
3676    Collective on MPI_Comm
3677 
3678    Input Parameters:
3679 +  comm - MPI communicator
3680 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3681            This value should be the same as the local size used in creating the
3682            y vector for the matrix-vector product y = Ax.
3683 .  n - This value should be the same as the local size used in creating the
3684        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3685        calculated if N is given) For square matrices n is almost always m.
3686 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3687 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3688 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3689            (same value is used for all local rows)
3690 .  d_nnz - array containing the number of nonzeros in the various rows of the
3691            DIAGONAL portion of the local submatrix (possibly different for each row)
3692            or NULL, if d_nz is used to specify the nonzero structure.
3693            The size of this array is equal to the number of local rows, i.e 'm'.
3694 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3695            submatrix (same value is used for all local rows).
3696 -  o_nnz - array containing the number of nonzeros in the various rows of the
3697            OFF-DIAGONAL portion of the local submatrix (possibly different for
3698            each row) or NULL, if o_nz is used to specify the nonzero
3699            structure. The size of this array is equal to the number
3700            of local rows, i.e 'm'.
3701 
3702    Output Parameter:
3703 .  A - the matrix
3704 
3705    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3706    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3707    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3708 
3709    Notes:
3710    If the *_nnz parameter is given then the *_nz parameter is ignored
3711 
3712    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3713    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3714    storage requirements for this matrix.
3715 
3716    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3717    processor than it must be used on all processors that share the object for
3718    that argument.
3719 
3720    The user MUST specify either the local or global matrix dimensions
3721    (possibly both).
3722 
3723    The parallel matrix is partitioned across processors such that the
3724    first m0 rows belong to process 0, the next m1 rows belong to
3725    process 1, the next m2 rows belong to process 2 etc.. where
3726    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3727    values corresponding to [m x N] submatrix.
3728 
3729    The columns are logically partitioned with the n0 columns belonging
3730    to 0th partition, the next n1 columns belonging to the next
3731    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3732 
3733    The DIAGONAL portion of the local submatrix on any given processor
3734    is the submatrix corresponding to the rows and columns m,n
3735    corresponding to the given processor. i.e diagonal matrix on
3736    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3737    etc. The remaining portion of the local submatrix [m x (N-n)]
3738    constitute the OFF-DIAGONAL portion. The example below better
3739    illustrates this concept.
3740 
3741    For a square global matrix we define each processor's diagonal portion
3742    to be its local rows and the corresponding columns (a square submatrix);
3743    each processor's off-diagonal portion encompasses the remainder of the
3744    local matrix (a rectangular submatrix).
3745 
3746    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3747 
3748    When calling this routine with a single process communicator, a matrix of
3749    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3750    type of communicator, use the construction mechanism:
3751      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3752 
3753    By default, this format uses inodes (identical nodes) when possible.
3754    We search for consecutive rows with the same nonzero structure, thereby
3755    reusing matrix information to achieve increased efficiency.
3756 
3757    Options Database Keys:
3758 +  -mat_no_inode  - Do not use inodes
3759 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3760 -  -mat_aij_oneindex - Internally use indexing starting at 1
3761         rather than 0.  Note that when calling MatSetValues(),
3762         the user still MUST index entries starting at 0!
3763 
3764 
3765    Example usage:
3766 
3767    Consider the following 8x8 matrix with 34 non-zero values, that is
3768    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3769    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3770    as follows:
3771 
3772 .vb
3773             1  2  0  |  0  3  0  |  0  4
3774     Proc0   0  5  6  |  7  0  0  |  8  0
3775             9  0 10  | 11  0  0  | 12  0
3776     -------------------------------------
3777            13  0 14  | 15 16 17  |  0  0
3778     Proc1   0 18  0  | 19 20 21  |  0  0
3779             0  0  0  | 22 23  0  | 24  0
3780     -------------------------------------
3781     Proc2  25 26 27  |  0  0 28  | 29  0
3782            30  0  0  | 31 32 33  |  0 34
3783 .ve
3784 
3785    This can be represented as a collection of submatrices as:
3786 
3787 .vb
3788       A B C
3789       D E F
3790       G H I
3791 .ve
3792 
3793    Where the submatrices A,B,C are owned by proc0, D,E,F are
3794    owned by proc1, G,H,I are owned by proc2.
3795 
3796    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3797    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3798    The 'M','N' parameters are 8,8, and have the same values on all procs.
3799 
3800    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3801    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3802    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3803    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3804    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3805    matrix, ans [DF] as another SeqAIJ matrix.
3806 
3807    When d_nz, o_nz parameters are specified, d_nz storage elements are
3808    allocated for every row of the local diagonal submatrix, and o_nz
3809    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3810    One way to choose d_nz and o_nz is to use the max nonzerors per local
3811    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3812    In this case, the values of d_nz,o_nz are:
3813 .vb
3814      proc0 : dnz = 2, o_nz = 2
3815      proc1 : dnz = 3, o_nz = 2
3816      proc2 : dnz = 1, o_nz = 4
3817 .ve
3818    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3819    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3820    for proc3. i.e we are using 12+15+10=37 storage locations to store
3821    34 values.
3822 
3823    When d_nnz, o_nnz parameters are specified, the storage is specified
3824    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3825    In the above case the values for d_nnz,o_nnz are:
3826 .vb
3827      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3828      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3829      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3830 .ve
3831    Here the space allocated is sum of all the above values i.e 34, and
3832    hence pre-allocation is perfect.
3833 
3834    Level: intermediate
3835 
3836 .keywords: matrix, aij, compressed row, sparse, parallel
3837 
3838 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3839           MPIAIJ, MatCreateMPIAIJWithArrays()
3840 @*/
3841 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3842 {
3843   PetscErrorCode ierr;
3844   PetscMPIInt    size;
3845 
3846   PetscFunctionBegin;
3847   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3848   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3849   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3850   if (size > 1) {
3851     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3852     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3853   } else {
3854     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3855     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3856   }
3857   PetscFunctionReturn(0);
3858 }
3859 
3860 #undef __FUNCT__
3861 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3862 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3863 {
3864   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3865 
3866   PetscFunctionBegin;
3867   if (Ad)     *Ad     = a->A;
3868   if (Ao)     *Ao     = a->B;
3869   if (colmap) *colmap = a->garray;
3870   PetscFunctionReturn(0);
3871 }
3872 
3873 #undef __FUNCT__
3874 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3875 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3876 {
3877   PetscErrorCode ierr;
3878   PetscInt       i;
3879   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3880 
3881   PetscFunctionBegin;
3882   if (coloring->ctype == IS_COLORING_GLOBAL) {
3883     ISColoringValue *allcolors,*colors;
3884     ISColoring      ocoloring;
3885 
3886     /* set coloring for diagonal portion */
3887     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3888 
3889     /* set coloring for off-diagonal portion */
3890     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3891     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
3892     for (i=0; i<a->B->cmap->n; i++) {
3893       colors[i] = allcolors[a->garray[i]];
3894     }
3895     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3896     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3897     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3898     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3899   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3900     ISColoringValue *colors;
3901     PetscInt        *larray;
3902     ISColoring      ocoloring;
3903 
3904     /* set coloring for diagonal portion */
3905     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
3906     for (i=0; i<a->A->cmap->n; i++) {
3907       larray[i] = i + A->cmap->rstart;
3908     }
3909     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3910     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
3911     for (i=0; i<a->A->cmap->n; i++) {
3912       colors[i] = coloring->colors[larray[i]];
3913     }
3914     ierr = PetscFree(larray);CHKERRQ(ierr);
3915     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3916     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3917     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3918 
3919     /* set coloring for off-diagonal portion */
3920     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
3921     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3922     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
3923     for (i=0; i<a->B->cmap->n; i++) {
3924       colors[i] = coloring->colors[larray[i]];
3925     }
3926     ierr = PetscFree(larray);CHKERRQ(ierr);
3927     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3928     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3929     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3930   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3931   PetscFunctionReturn(0);
3932 }
3933 
3934 #undef __FUNCT__
3935 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3936 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3937 {
3938   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3939   PetscErrorCode ierr;
3940 
3941   PetscFunctionBegin;
3942   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3943   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3944   PetscFunctionReturn(0);
3945 }
3946 
3947 #undef __FUNCT__
3948 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
3949 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
3950 {
3951   PetscErrorCode ierr;
3952   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
3953   PetscInt       *indx;
3954 
3955   PetscFunctionBegin;
3956   /* This routine will ONLY return MPIAIJ type matrix */
3957   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3958   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3959   if (n == PETSC_DECIDE) {
3960     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3961   }
3962   /* Check sum(n) = N */
3963   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3964   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3965 
3966   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3967   rstart -= m;
3968 
3969   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3970   for (i=0; i<m; i++) {
3971     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3972     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3973     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3974   }
3975 
3976   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3977   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3978   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3979   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3980   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3981   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3982   PetscFunctionReturn(0);
3983 }
3984 
3985 #undef __FUNCT__
3986 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
3987 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
3988 {
3989   PetscErrorCode ierr;
3990   PetscInt       m,N,i,rstart,nnz,Ii;
3991   PetscInt       *indx;
3992   PetscScalar    *values;
3993 
3994   PetscFunctionBegin;
3995   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3996   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
3997   for (i=0; i<m; i++) {
3998     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3999     Ii   = i + rstart;
4000     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4001     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4002   }
4003   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4004   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4005   PetscFunctionReturn(0);
4006 }
4007 
4008 #undef __FUNCT__
4009 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
4010 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4011 {
4012   PetscErrorCode ierr;
4013   PetscMPIInt    size;
4014 
4015   PetscFunctionBegin;
4016   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4017   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4018   if (size == 1) {
4019     if (scall == MAT_INITIAL_MATRIX) {
4020       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4021     } else {
4022       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4023     }
4024   } else {
4025     if (scall == MAT_INITIAL_MATRIX) {
4026       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4027     }
4028     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4029   }
4030   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4031   PetscFunctionReturn(0);
4032 }
4033 
4034 #undef __FUNCT__
4035 #define __FUNCT__ "MatFileSplit"
4036 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4037 {
4038   PetscErrorCode    ierr;
4039   PetscMPIInt       rank;
4040   PetscInt          m,N,i,rstart,nnz;
4041   size_t            len;
4042   const PetscInt    *indx;
4043   PetscViewer       out;
4044   char              *name;
4045   Mat               B;
4046   const PetscScalar *values;
4047 
4048   PetscFunctionBegin;
4049   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4050   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4051   /* Should this be the type of the diagonal block of A? */
4052   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4053   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4054   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4055   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4056   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4057   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4058   for (i=0; i<m; i++) {
4059     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4060     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4061     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4062   }
4063   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4064   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4065 
4066   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4067   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4068   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4069   sprintf(name,"%s.%d",outfile,rank);
4070   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4071   ierr = PetscFree(name);CHKERRQ(ierr);
4072   ierr = MatView(B,out);CHKERRQ(ierr);
4073   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4074   ierr = MatDestroy(&B);CHKERRQ(ierr);
4075   PetscFunctionReturn(0);
4076 }
4077 
4078 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4079 #undef __FUNCT__
4080 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4081 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4082 {
4083   PetscErrorCode      ierr;
4084   Mat_Merge_SeqsToMPI *merge;
4085   PetscContainer      container;
4086 
4087   PetscFunctionBegin;
4088   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4089   if (container) {
4090     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4091     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4092     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4093     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4094     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4095     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4096     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4097     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4098     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4099     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4100     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4101     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4102     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4103     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4104     ierr = PetscFree(merge);CHKERRQ(ierr);
4105     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4106   }
4107   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4108   PetscFunctionReturn(0);
4109 }
4110 
4111 #include <../src/mat/utils/freespace.h>
4112 #include <petscbt.h>
4113 
4114 #undef __FUNCT__
4115 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4116 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4117 {
4118   PetscErrorCode      ierr;
4119   MPI_Comm            comm;
4120   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4121   PetscMPIInt         size,rank,taga,*len_s;
4122   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4123   PetscInt            proc,m;
4124   PetscInt            **buf_ri,**buf_rj;
4125   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4126   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4127   MPI_Request         *s_waits,*r_waits;
4128   MPI_Status          *status;
4129   MatScalar           *aa=a->a;
4130   MatScalar           **abuf_r,*ba_i;
4131   Mat_Merge_SeqsToMPI *merge;
4132   PetscContainer      container;
4133 
4134   PetscFunctionBegin;
4135   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4136   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4137 
4138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4139   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4140 
4141   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4142   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4143 
4144   bi     = merge->bi;
4145   bj     = merge->bj;
4146   buf_ri = merge->buf_ri;
4147   buf_rj = merge->buf_rj;
4148 
4149   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4150   owners = merge->rowmap->range;
4151   len_s  = merge->len_s;
4152 
4153   /* send and recv matrix values */
4154   /*-----------------------------*/
4155   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4156   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4157 
4158   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4159   for (proc=0,k=0; proc<size; proc++) {
4160     if (!len_s[proc]) continue;
4161     i    = owners[proc];
4162     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4163     k++;
4164   }
4165 
4166   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4167   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4168   ierr = PetscFree(status);CHKERRQ(ierr);
4169 
4170   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4171   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4172 
4173   /* insert mat values of mpimat */
4174   /*----------------------------*/
4175   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4176   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4177 
4178   for (k=0; k<merge->nrecv; k++) {
4179     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4180     nrows       = *(buf_ri_k[k]);
4181     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4182     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4183   }
4184 
4185   /* set values of ba */
4186   m = merge->rowmap->n;
4187   for (i=0; i<m; i++) {
4188     arow = owners[rank] + i;
4189     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4190     bnzi = bi[i+1] - bi[i];
4191     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4192 
4193     /* add local non-zero vals of this proc's seqmat into ba */
4194     anzi   = ai[arow+1] - ai[arow];
4195     aj     = a->j + ai[arow];
4196     aa     = a->a + ai[arow];
4197     nextaj = 0;
4198     for (j=0; nextaj<anzi; j++) {
4199       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4200         ba_i[j] += aa[nextaj++];
4201       }
4202     }
4203 
4204     /* add received vals into ba */
4205     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4206       /* i-th row */
4207       if (i == *nextrow[k]) {
4208         anzi   = *(nextai[k]+1) - *nextai[k];
4209         aj     = buf_rj[k] + *(nextai[k]);
4210         aa     = abuf_r[k] + *(nextai[k]);
4211         nextaj = 0;
4212         for (j=0; nextaj<anzi; j++) {
4213           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4214             ba_i[j] += aa[nextaj++];
4215           }
4216         }
4217         nextrow[k]++; nextai[k]++;
4218       }
4219     }
4220     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4221   }
4222   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4223   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4224 
4225   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4226   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4227   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4228   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4229   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4230   PetscFunctionReturn(0);
4231 }
4232 
4233 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4234 
4235 #undef __FUNCT__
4236 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4237 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4238 {
4239   PetscErrorCode      ierr;
4240   Mat                 B_mpi;
4241   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4242   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4243   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4244   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4245   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4246   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4247   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4248   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4249   MPI_Status          *status;
4250   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4251   PetscBT             lnkbt;
4252   Mat_Merge_SeqsToMPI *merge;
4253   PetscContainer      container;
4254 
4255   PetscFunctionBegin;
4256   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4257 
4258   /* make sure it is a PETSc comm */
4259   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4260   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4261   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4262 
4263   ierr = PetscNew(&merge);CHKERRQ(ierr);
4264   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4265 
4266   /* determine row ownership */
4267   /*---------------------------------------------------------*/
4268   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4269   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4270   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4271   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4272   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4273   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4274   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4275 
4276   m      = merge->rowmap->n;
4277   owners = merge->rowmap->range;
4278 
4279   /* determine the number of messages to send, their lengths */
4280   /*---------------------------------------------------------*/
4281   len_s = merge->len_s;
4282 
4283   len          = 0; /* length of buf_si[] */
4284   merge->nsend = 0;
4285   for (proc=0; proc<size; proc++) {
4286     len_si[proc] = 0;
4287     if (proc == rank) {
4288       len_s[proc] = 0;
4289     } else {
4290       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4291       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4292     }
4293     if (len_s[proc]) {
4294       merge->nsend++;
4295       nrows = 0;
4296       for (i=owners[proc]; i<owners[proc+1]; i++) {
4297         if (ai[i+1] > ai[i]) nrows++;
4298       }
4299       len_si[proc] = 2*(nrows+1);
4300       len         += len_si[proc];
4301     }
4302   }
4303 
4304   /* determine the number and length of messages to receive for ij-structure */
4305   /*-------------------------------------------------------------------------*/
4306   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4307   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4308 
4309   /* post the Irecv of j-structure */
4310   /*-------------------------------*/
4311   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4312   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4313 
4314   /* post the Isend of j-structure */
4315   /*--------------------------------*/
4316   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4317 
4318   for (proc=0, k=0; proc<size; proc++) {
4319     if (!len_s[proc]) continue;
4320     i    = owners[proc];
4321     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4322     k++;
4323   }
4324 
4325   /* receives and sends of j-structure are complete */
4326   /*------------------------------------------------*/
4327   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4328   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4329 
4330   /* send and recv i-structure */
4331   /*---------------------------*/
4332   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4333   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4334 
4335   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4336   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4337   for (proc=0,k=0; proc<size; proc++) {
4338     if (!len_s[proc]) continue;
4339     /* form outgoing message for i-structure:
4340          buf_si[0]:                 nrows to be sent
4341                [1:nrows]:           row index (global)
4342                [nrows+1:2*nrows+1]: i-structure index
4343     */
4344     /*-------------------------------------------*/
4345     nrows       = len_si[proc]/2 - 1;
4346     buf_si_i    = buf_si + nrows+1;
4347     buf_si[0]   = nrows;
4348     buf_si_i[0] = 0;
4349     nrows       = 0;
4350     for (i=owners[proc]; i<owners[proc+1]; i++) {
4351       anzi = ai[i+1] - ai[i];
4352       if (anzi) {
4353         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4354         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4355         nrows++;
4356       }
4357     }
4358     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4359     k++;
4360     buf_si += len_si[proc];
4361   }
4362 
4363   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4364   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4365 
4366   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4367   for (i=0; i<merge->nrecv; i++) {
4368     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4369   }
4370 
4371   ierr = PetscFree(len_si);CHKERRQ(ierr);
4372   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4373   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4374   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4375   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4376   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4377   ierr = PetscFree(status);CHKERRQ(ierr);
4378 
4379   /* compute a local seq matrix in each processor */
4380   /*----------------------------------------------*/
4381   /* allocate bi array and free space for accumulating nonzero column info */
4382   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4383   bi[0] = 0;
4384 
4385   /* create and initialize a linked list */
4386   nlnk = N+1;
4387   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4388 
4389   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4390   len  = ai[owners[rank+1]] - ai[owners[rank]];
4391   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4392 
4393   current_space = free_space;
4394 
4395   /* determine symbolic info for each local row */
4396   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4397 
4398   for (k=0; k<merge->nrecv; k++) {
4399     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4400     nrows       = *buf_ri_k[k];
4401     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4402     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4403   }
4404 
4405   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4406   len  = 0;
4407   for (i=0; i<m; i++) {
4408     bnzi = 0;
4409     /* add local non-zero cols of this proc's seqmat into lnk */
4410     arow  = owners[rank] + i;
4411     anzi  = ai[arow+1] - ai[arow];
4412     aj    = a->j + ai[arow];
4413     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4414     bnzi += nlnk;
4415     /* add received col data into lnk */
4416     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4417       if (i == *nextrow[k]) { /* i-th row */
4418         anzi  = *(nextai[k]+1) - *nextai[k];
4419         aj    = buf_rj[k] + *nextai[k];
4420         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4421         bnzi += nlnk;
4422         nextrow[k]++; nextai[k]++;
4423       }
4424     }
4425     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4426 
4427     /* if free space is not available, make more free space */
4428     if (current_space->local_remaining<bnzi) {
4429       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4430       nspacedouble++;
4431     }
4432     /* copy data into free space, then initialize lnk */
4433     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4434     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4435 
4436     current_space->array           += bnzi;
4437     current_space->local_used      += bnzi;
4438     current_space->local_remaining -= bnzi;
4439 
4440     bi[i+1] = bi[i] + bnzi;
4441   }
4442 
4443   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4444 
4445   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4446   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4447   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4448 
4449   /* create symbolic parallel matrix B_mpi */
4450   /*---------------------------------------*/
4451   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4452   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4453   if (n==PETSC_DECIDE) {
4454     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4455   } else {
4456     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4457   }
4458   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4459   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4460   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4461   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4462   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4463 
4464   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4465   B_mpi->assembled    = PETSC_FALSE;
4466   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4467   merge->bi           = bi;
4468   merge->bj           = bj;
4469   merge->buf_ri       = buf_ri;
4470   merge->buf_rj       = buf_rj;
4471   merge->coi          = NULL;
4472   merge->coj          = NULL;
4473   merge->owners_co    = NULL;
4474 
4475   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4476 
4477   /* attach the supporting struct to B_mpi for reuse */
4478   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4479   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4480   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4481   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4482   *mpimat = B_mpi;
4483 
4484   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4485   PetscFunctionReturn(0);
4486 }
4487 
4488 #undef __FUNCT__
4489 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4490 /*@C
4491       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4492                  matrices from each processor
4493 
4494     Collective on MPI_Comm
4495 
4496    Input Parameters:
4497 +    comm - the communicators the parallel matrix will live on
4498 .    seqmat - the input sequential matrices
4499 .    m - number of local rows (or PETSC_DECIDE)
4500 .    n - number of local columns (or PETSC_DECIDE)
4501 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4502 
4503    Output Parameter:
4504 .    mpimat - the parallel matrix generated
4505 
4506     Level: advanced
4507 
4508    Notes:
4509      The dimensions of the sequential matrix in each processor MUST be the same.
4510      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4511      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4512 @*/
4513 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4514 {
4515   PetscErrorCode ierr;
4516   PetscMPIInt    size;
4517 
4518   PetscFunctionBegin;
4519   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4520   if (size == 1) {
4521     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4522     if (scall == MAT_INITIAL_MATRIX) {
4523       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4524     } else {
4525       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4526     }
4527     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4528     PetscFunctionReturn(0);
4529   }
4530   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4531   if (scall == MAT_INITIAL_MATRIX) {
4532     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4533   }
4534   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4535   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4536   PetscFunctionReturn(0);
4537 }
4538 
4539 #undef __FUNCT__
4540 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4541 /*@
4542      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4543           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4544           with MatGetSize()
4545 
4546     Not Collective
4547 
4548    Input Parameters:
4549 +    A - the matrix
4550 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4551 
4552    Output Parameter:
4553 .    A_loc - the local sequential matrix generated
4554 
4555     Level: developer
4556 
4557 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4558 
4559 @*/
4560 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4561 {
4562   PetscErrorCode ierr;
4563   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4564   Mat_SeqAIJ     *mat,*a,*b;
4565   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4566   MatScalar      *aa,*ba,*cam;
4567   PetscScalar    *ca;
4568   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4569   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4570   PetscBool      match;
4571   MPI_Comm       comm;
4572   PetscMPIInt    size;
4573 
4574   PetscFunctionBegin;
4575   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4576   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4577   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4578   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4579   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4580 
4581   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4582   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4583   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4584   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4585   aa = a->a; ba = b->a;
4586   if (scall == MAT_INITIAL_MATRIX) {
4587     if (size == 1) {
4588       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4589       PetscFunctionReturn(0);
4590     }
4591 
4592     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4593     ci[0] = 0;
4594     for (i=0; i<am; i++) {
4595       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4596     }
4597     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4598     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4599     k    = 0;
4600     for (i=0; i<am; i++) {
4601       ncols_o = bi[i+1] - bi[i];
4602       ncols_d = ai[i+1] - ai[i];
4603       /* off-diagonal portion of A */
4604       for (jo=0; jo<ncols_o; jo++) {
4605         col = cmap[*bj];
4606         if (col >= cstart) break;
4607         cj[k]   = col; bj++;
4608         ca[k++] = *ba++;
4609       }
4610       /* diagonal portion of A */
4611       for (j=0; j<ncols_d; j++) {
4612         cj[k]   = cstart + *aj++;
4613         ca[k++] = *aa++;
4614       }
4615       /* off-diagonal portion of A */
4616       for (j=jo; j<ncols_o; j++) {
4617         cj[k]   = cmap[*bj++];
4618         ca[k++] = *ba++;
4619       }
4620     }
4621     /* put together the new matrix */
4622     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4623     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4624     /* Since these are PETSc arrays, change flags to free them as necessary. */
4625     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4626     mat->free_a  = PETSC_TRUE;
4627     mat->free_ij = PETSC_TRUE;
4628     mat->nonew   = 0;
4629   } else if (scall == MAT_REUSE_MATRIX) {
4630     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4631     ci = mat->i; cj = mat->j; cam = mat->a;
4632     for (i=0; i<am; i++) {
4633       /* off-diagonal portion of A */
4634       ncols_o = bi[i+1] - bi[i];
4635       for (jo=0; jo<ncols_o; jo++) {
4636         col = cmap[*bj];
4637         if (col >= cstart) break;
4638         *cam++ = *ba++; bj++;
4639       }
4640       /* diagonal portion of A */
4641       ncols_d = ai[i+1] - ai[i];
4642       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4643       /* off-diagonal portion of A */
4644       for (j=jo; j<ncols_o; j++) {
4645         *cam++ = *ba++; bj++;
4646       }
4647     }
4648   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4649   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4650   PetscFunctionReturn(0);
4651 }
4652 
4653 #undef __FUNCT__
4654 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4655 /*@C
4656      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4657 
4658     Not Collective
4659 
4660    Input Parameters:
4661 +    A - the matrix
4662 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4663 -    row, col - index sets of rows and columns to extract (or NULL)
4664 
4665    Output Parameter:
4666 .    A_loc - the local sequential matrix generated
4667 
4668     Level: developer
4669 
4670 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4671 
4672 @*/
4673 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4674 {
4675   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4676   PetscErrorCode ierr;
4677   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4678   IS             isrowa,iscola;
4679   Mat            *aloc;
4680   PetscBool      match;
4681 
4682   PetscFunctionBegin;
4683   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4684   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4685   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4686   if (!row) {
4687     start = A->rmap->rstart; end = A->rmap->rend;
4688     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4689   } else {
4690     isrowa = *row;
4691   }
4692   if (!col) {
4693     start = A->cmap->rstart;
4694     cmap  = a->garray;
4695     nzA   = a->A->cmap->n;
4696     nzB   = a->B->cmap->n;
4697     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
4698     ncols = 0;
4699     for (i=0; i<nzB; i++) {
4700       if (cmap[i] < start) idx[ncols++] = cmap[i];
4701       else break;
4702     }
4703     imark = i;
4704     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4705     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4706     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4707   } else {
4708     iscola = *col;
4709   }
4710   if (scall != MAT_INITIAL_MATRIX) {
4711     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4712     aloc[0] = *A_loc;
4713   }
4714   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4715   *A_loc = aloc[0];
4716   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4717   if (!row) {
4718     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4719   }
4720   if (!col) {
4721     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4722   }
4723   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4724   PetscFunctionReturn(0);
4725 }
4726 
4727 #undef __FUNCT__
4728 #define __FUNCT__ "MatGetBrowsOfAcols"
4729 /*@C
4730     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4731 
4732     Collective on Mat
4733 
4734    Input Parameters:
4735 +    A,B - the matrices in mpiaij format
4736 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4737 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4738 
4739    Output Parameter:
4740 +    rowb, colb - index sets of rows and columns of B to extract
4741 -    B_seq - the sequential matrix generated
4742 
4743     Level: developer
4744 
4745 @*/
4746 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4747 {
4748   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4749   PetscErrorCode ierr;
4750   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4751   IS             isrowb,iscolb;
4752   Mat            *bseq=NULL;
4753 
4754   PetscFunctionBegin;
4755   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4756     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4757   }
4758   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4759 
4760   if (scall == MAT_INITIAL_MATRIX) {
4761     start = A->cmap->rstart;
4762     cmap  = a->garray;
4763     nzA   = a->A->cmap->n;
4764     nzB   = a->B->cmap->n;
4765     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
4766     ncols = 0;
4767     for (i=0; i<nzB; i++) {  /* row < local row index */
4768       if (cmap[i] < start) idx[ncols++] = cmap[i];
4769       else break;
4770     }
4771     imark = i;
4772     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4773     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4774     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4775     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4776   } else {
4777     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4778     isrowb  = *rowb; iscolb = *colb;
4779     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4780     bseq[0] = *B_seq;
4781   }
4782   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4783   *B_seq = bseq[0];
4784   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4785   if (!rowb) {
4786     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4787   } else {
4788     *rowb = isrowb;
4789   }
4790   if (!colb) {
4791     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4792   } else {
4793     *colb = iscolb;
4794   }
4795   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4796   PetscFunctionReturn(0);
4797 }
4798 
4799 #undef __FUNCT__
4800 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4801 /*
4802     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4803     of the OFF-DIAGONAL portion of local A
4804 
4805     Collective on Mat
4806 
4807    Input Parameters:
4808 +    A,B - the matrices in mpiaij format
4809 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4810 
4811    Output Parameter:
4812 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4813 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4814 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4815 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4816 
4817     Level: developer
4818 
4819 */
4820 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4821 {
4822   VecScatter_MPI_General *gen_to,*gen_from;
4823   PetscErrorCode         ierr;
4824   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4825   Mat_SeqAIJ             *b_oth;
4826   VecScatter             ctx =a->Mvctx;
4827   MPI_Comm               comm;
4828   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4829   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4830   PetscScalar            *rvalues,*svalues;
4831   MatScalar              *b_otha,*bufa,*bufA;
4832   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4833   MPI_Request            *rwaits = NULL,*swaits = NULL;
4834   MPI_Status             *sstatus,rstatus;
4835   PetscMPIInt            jj,size;
4836   PetscInt               *cols,sbs,rbs;
4837   PetscScalar            *vals;
4838 
4839   PetscFunctionBegin;
4840   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4841   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4842   if (size == 1) PetscFunctionReturn(0);
4843 
4844   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4845     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4846   }
4847   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4848   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4849 
4850   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4851   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4852   rvalues  = gen_from->values; /* holds the length of receiving row */
4853   svalues  = gen_to->values;   /* holds the length of sending row */
4854   nrecvs   = gen_from->n;
4855   nsends   = gen_to->n;
4856 
4857   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4858   srow    = gen_to->indices;    /* local row index to be sent */
4859   sstarts = gen_to->starts;
4860   sprocs  = gen_to->procs;
4861   sstatus = gen_to->sstatus;
4862   sbs     = gen_to->bs;
4863   rstarts = gen_from->starts;
4864   rprocs  = gen_from->procs;
4865   rbs     = gen_from->bs;
4866 
4867   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4868   if (scall == MAT_INITIAL_MATRIX) {
4869     /* i-array */
4870     /*---------*/
4871     /*  post receives */
4872     for (i=0; i<nrecvs; i++) {
4873       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4874       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4875       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4876     }
4877 
4878     /* pack the outgoing message */
4879     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4880 
4881     sstartsj[0] = 0;
4882     rstartsj[0] = 0;
4883     len         = 0; /* total length of j or a array to be sent */
4884     k           = 0;
4885     for (i=0; i<nsends; i++) {
4886       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4887       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4888       for (j=0; j<nrows; j++) {
4889         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4890         for (l=0; l<sbs; l++) {
4891           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4892 
4893           rowlen[j*sbs+l] = ncols;
4894 
4895           len += ncols;
4896           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4897         }
4898         k++;
4899       }
4900       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4901 
4902       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4903     }
4904     /* recvs and sends of i-array are completed */
4905     i = nrecvs;
4906     while (i--) {
4907       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4908     }
4909     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4910 
4911     /* allocate buffers for sending j and a arrays */
4912     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
4913     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
4914 
4915     /* create i-array of B_oth */
4916     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
4917 
4918     b_othi[0] = 0;
4919     len       = 0; /* total length of j or a array to be received */
4920     k         = 0;
4921     for (i=0; i<nrecvs; i++) {
4922       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4923       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4924       for (j=0; j<nrows; j++) {
4925         b_othi[k+1] = b_othi[k] + rowlen[j];
4926         len        += rowlen[j]; k++;
4927       }
4928       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4929     }
4930 
4931     /* allocate space for j and a arrrays of B_oth */
4932     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
4933     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
4934 
4935     /* j-array */
4936     /*---------*/
4937     /*  post receives of j-array */
4938     for (i=0; i<nrecvs; i++) {
4939       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4940       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4941     }
4942 
4943     /* pack the outgoing message j-array */
4944     k = 0;
4945     for (i=0; i<nsends; i++) {
4946       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4947       bufJ  = bufj+sstartsj[i];
4948       for (j=0; j<nrows; j++) {
4949         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4950         for (ll=0; ll<sbs; ll++) {
4951           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4952           for (l=0; l<ncols; l++) {
4953             *bufJ++ = cols[l];
4954           }
4955           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4956         }
4957       }
4958       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4959     }
4960 
4961     /* recvs and sends of j-array are completed */
4962     i = nrecvs;
4963     while (i--) {
4964       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4965     }
4966     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4967   } else if (scall == MAT_REUSE_MATRIX) {
4968     sstartsj = *startsj_s;
4969     rstartsj = *startsj_r;
4970     bufa     = *bufa_ptr;
4971     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4972     b_otha   = b_oth->a;
4973   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4974 
4975   /* a-array */
4976   /*---------*/
4977   /*  post receives of a-array */
4978   for (i=0; i<nrecvs; i++) {
4979     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4980     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4981   }
4982 
4983   /* pack the outgoing message a-array */
4984   k = 0;
4985   for (i=0; i<nsends; i++) {
4986     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4987     bufA  = bufa+sstartsj[i];
4988     for (j=0; j<nrows; j++) {
4989       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4990       for (ll=0; ll<sbs; ll++) {
4991         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4992         for (l=0; l<ncols; l++) {
4993           *bufA++ = vals[l];
4994         }
4995         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4996       }
4997     }
4998     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4999   }
5000   /* recvs and sends of a-array are completed */
5001   i = nrecvs;
5002   while (i--) {
5003     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5004   }
5005   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5006   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5007 
5008   if (scall == MAT_INITIAL_MATRIX) {
5009     /* put together the new matrix */
5010     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5011 
5012     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5013     /* Since these are PETSc arrays, change flags to free them as necessary. */
5014     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5015     b_oth->free_a  = PETSC_TRUE;
5016     b_oth->free_ij = PETSC_TRUE;
5017     b_oth->nonew   = 0;
5018 
5019     ierr = PetscFree(bufj);CHKERRQ(ierr);
5020     if (!startsj_s || !bufa_ptr) {
5021       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5022       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5023     } else {
5024       *startsj_s = sstartsj;
5025       *startsj_r = rstartsj;
5026       *bufa_ptr  = bufa;
5027     }
5028   }
5029   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5030   PetscFunctionReturn(0);
5031 }
5032 
5033 #undef __FUNCT__
5034 #define __FUNCT__ "MatGetCommunicationStructs"
5035 /*@C
5036   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5037 
5038   Not Collective
5039 
5040   Input Parameters:
5041 . A - The matrix in mpiaij format
5042 
5043   Output Parameter:
5044 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5045 . colmap - A map from global column index to local index into lvec
5046 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5047 
5048   Level: developer
5049 
5050 @*/
5051 #if defined(PETSC_USE_CTABLE)
5052 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5053 #else
5054 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5055 #endif
5056 {
5057   Mat_MPIAIJ *a;
5058 
5059   PetscFunctionBegin;
5060   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5061   PetscValidPointer(lvec, 2);
5062   PetscValidPointer(colmap, 3);
5063   PetscValidPointer(multScatter, 4);
5064   a = (Mat_MPIAIJ*) A->data;
5065   if (lvec) *lvec = a->lvec;
5066   if (colmap) *colmap = a->colmap;
5067   if (multScatter) *multScatter = a->Mvctx;
5068   PetscFunctionReturn(0);
5069 }
5070 
5071 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5072 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5073 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5074 #if defined(PETSC_HAVE_ELEMENTAL)
5075 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5076 #endif
5077 
5078 #undef __FUNCT__
5079 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5080 /*
5081     Computes (B'*A')' since computing B*A directly is untenable
5082 
5083                n                       p                          p
5084         (              )       (              )         (                  )
5085       m (      A       )  *  n (       B      )   =   m (         C        )
5086         (              )       (              )         (                  )
5087 
5088 */
5089 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5090 {
5091   PetscErrorCode ierr;
5092   Mat            At,Bt,Ct;
5093 
5094   PetscFunctionBegin;
5095   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5096   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5097   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5098   ierr = MatDestroy(&At);CHKERRQ(ierr);
5099   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5100   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5101   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5102   PetscFunctionReturn(0);
5103 }
5104 
5105 #undef __FUNCT__
5106 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5107 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5108 {
5109   PetscErrorCode ierr;
5110   PetscInt       m=A->rmap->n,n=B->cmap->n;
5111   Mat            Cmat;
5112 
5113   PetscFunctionBegin;
5114   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5115   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5116   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5117   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5118   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5119   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5120   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5121   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5122 
5123   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5124 
5125   *C = Cmat;
5126   PetscFunctionReturn(0);
5127 }
5128 
5129 /* ----------------------------------------------------------------*/
5130 #undef __FUNCT__
5131 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5132 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5133 {
5134   PetscErrorCode ierr;
5135 
5136   PetscFunctionBegin;
5137   if (scall == MAT_INITIAL_MATRIX) {
5138     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5139     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5140     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5141   }
5142   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5143   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5144   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5145   PetscFunctionReturn(0);
5146 }
5147 
5148 #if defined(PETSC_HAVE_MUMPS)
5149 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5150 #endif
5151 #if defined(PETSC_HAVE_PASTIX)
5152 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5153 #endif
5154 #if defined(PETSC_HAVE_SUPERLU_DIST)
5155 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5156 #endif
5157 #if defined(PETSC_HAVE_CLIQUE)
5158 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5159 #endif
5160 
5161 /*MC
5162    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5163 
5164    Options Database Keys:
5165 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5166 
5167   Level: beginner
5168 
5169 .seealso: MatCreateAIJ()
5170 M*/
5171 
5172 #undef __FUNCT__
5173 #define __FUNCT__ "MatCreate_MPIAIJ"
5174 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5175 {
5176   Mat_MPIAIJ     *b;
5177   PetscErrorCode ierr;
5178   PetscMPIInt    size;
5179 
5180   PetscFunctionBegin;
5181   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5182 
5183   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5184   B->data       = (void*)b;
5185   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5186   B->assembled  = PETSC_FALSE;
5187   B->insertmode = NOT_SET_VALUES;
5188   b->size       = size;
5189 
5190   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5191 
5192   /* build cache for off array entries formed */
5193   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5194 
5195   b->donotstash  = PETSC_FALSE;
5196   b->colmap      = 0;
5197   b->garray      = 0;
5198   b->roworiented = PETSC_TRUE;
5199 
5200   /* stuff used for matrix vector multiply */
5201   b->lvec  = NULL;
5202   b->Mvctx = NULL;
5203 
5204   /* stuff for MatGetRow() */
5205   b->rowindices   = 0;
5206   b->rowvalues    = 0;
5207   b->getrowactive = PETSC_FALSE;
5208 
5209   /* flexible pointer used in CUSP/CUSPARSE classes */
5210   b->spptr = NULL;
5211 
5212 #if defined(PETSC_HAVE_MUMPS)
5213   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5214 #endif
5215 #if defined(PETSC_HAVE_PASTIX)
5216   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5217 #endif
5218 #if defined(PETSC_HAVE_SUPERLU_DIST)
5219   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5220 #endif
5221 #if defined(PETSC_HAVE_CLIQUE)
5222   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5223 #endif
5224   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5225   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5226   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5227   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5228   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5229   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5230   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5231   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5232   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5233   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5234 #if defined(PETSC_HAVE_ELEMENTAL)
5235   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5236 #endif
5237   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5238   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5239   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5240   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5241   PetscFunctionReturn(0);
5242 }
5243 
5244 #undef __FUNCT__
5245 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5246 /*@
5247      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5248          and "off-diagonal" part of the matrix in CSR format.
5249 
5250    Collective on MPI_Comm
5251 
5252    Input Parameters:
5253 +  comm - MPI communicator
5254 .  m - number of local rows (Cannot be PETSC_DECIDE)
5255 .  n - This value should be the same as the local size used in creating the
5256        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5257        calculated if N is given) For square matrices n is almost always m.
5258 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5259 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5260 .   i - row indices for "diagonal" portion of matrix
5261 .   j - column indices
5262 .   a - matrix values
5263 .   oi - row indices for "off-diagonal" portion of matrix
5264 .   oj - column indices
5265 -   oa - matrix values
5266 
5267    Output Parameter:
5268 .   mat - the matrix
5269 
5270    Level: advanced
5271 
5272    Notes:
5273        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5274        must free the arrays once the matrix has been destroyed and not before.
5275 
5276        The i and j indices are 0 based
5277 
5278        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5279 
5280        This sets local rows and cannot be used to set off-processor values.
5281 
5282        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5283        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5284        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5285        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5286        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5287        communication if it is known that only local entries will be set.
5288 
5289 .keywords: matrix, aij, compressed row, sparse, parallel
5290 
5291 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5292           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5293 @*/
5294 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5295 {
5296   PetscErrorCode ierr;
5297   Mat_MPIAIJ     *maij;
5298 
5299   PetscFunctionBegin;
5300   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5301   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5302   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5303   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5304   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5305   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5306   maij = (Mat_MPIAIJ*) (*mat)->data;
5307 
5308   (*mat)->preallocated = PETSC_TRUE;
5309 
5310   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5311   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5312 
5313   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5314   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5315 
5316   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5317   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5318   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5319   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5320 
5321   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5322   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5323   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5324   PetscFunctionReturn(0);
5325 }
5326 
5327 /*
5328     Special version for direct calls from Fortran
5329 */
5330 #include <petsc-private/fortranimpl.h>
5331 
5332 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5333 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5334 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5335 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5336 #endif
5337 
5338 /* Change these macros so can be used in void function */
5339 #undef CHKERRQ
5340 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5341 #undef SETERRQ2
5342 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5343 #undef SETERRQ3
5344 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5345 #undef SETERRQ
5346 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5347 
5348 #undef __FUNCT__
5349 #define __FUNCT__ "matsetvaluesmpiaij_"
5350 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5351 {
5352   Mat            mat  = *mmat;
5353   PetscInt       m    = *mm, n = *mn;
5354   InsertMode     addv = *maddv;
5355   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5356   PetscScalar    value;
5357   PetscErrorCode ierr;
5358 
5359   MatCheckPreallocated(mat,1);
5360   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5361 
5362 #if defined(PETSC_USE_DEBUG)
5363   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5364 #endif
5365   {
5366     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5367     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5368     PetscBool roworiented = aij->roworiented;
5369 
5370     /* Some Variables required in the macro */
5371     Mat        A                 = aij->A;
5372     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5373     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5374     MatScalar  *aa               = a->a;
5375     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5376     Mat        B                 = aij->B;
5377     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5378     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5379     MatScalar  *ba               = b->a;
5380 
5381     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5382     PetscInt  nonew = a->nonew;
5383     MatScalar *ap1,*ap2;
5384 
5385     PetscFunctionBegin;
5386     for (i=0; i<m; i++) {
5387       if (im[i] < 0) continue;
5388 #if defined(PETSC_USE_DEBUG)
5389       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5390 #endif
5391       if (im[i] >= rstart && im[i] < rend) {
5392         row      = im[i] - rstart;
5393         lastcol1 = -1;
5394         rp1      = aj + ai[row];
5395         ap1      = aa + ai[row];
5396         rmax1    = aimax[row];
5397         nrow1    = ailen[row];
5398         low1     = 0;
5399         high1    = nrow1;
5400         lastcol2 = -1;
5401         rp2      = bj + bi[row];
5402         ap2      = ba + bi[row];
5403         rmax2    = bimax[row];
5404         nrow2    = bilen[row];
5405         low2     = 0;
5406         high2    = nrow2;
5407 
5408         for (j=0; j<n; j++) {
5409           if (roworiented) value = v[i*n+j];
5410           else value = v[i+j*m];
5411           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5412           if (in[j] >= cstart && in[j] < cend) {
5413             col = in[j] - cstart;
5414             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5415           } else if (in[j] < 0) continue;
5416 #if defined(PETSC_USE_DEBUG)
5417           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5418 #endif
5419           else {
5420             if (mat->was_assembled) {
5421               if (!aij->colmap) {
5422                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5423               }
5424 #if defined(PETSC_USE_CTABLE)
5425               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5426               col--;
5427 #else
5428               col = aij->colmap[in[j]] - 1;
5429 #endif
5430               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5431                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5432                 col  =  in[j];
5433                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5434                 B     = aij->B;
5435                 b     = (Mat_SeqAIJ*)B->data;
5436                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5437                 rp2   = bj + bi[row];
5438                 ap2   = ba + bi[row];
5439                 rmax2 = bimax[row];
5440                 nrow2 = bilen[row];
5441                 low2  = 0;
5442                 high2 = nrow2;
5443                 bm    = aij->B->rmap->n;
5444                 ba    = b->a;
5445               }
5446             } else col = in[j];
5447             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5448           }
5449         }
5450       } else if (!aij->donotstash) {
5451         if (roworiented) {
5452           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5453         } else {
5454           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5455         }
5456       }
5457     }
5458   }
5459   PetscFunctionReturnVoid();
5460 }
5461 
5462