xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f66eea085cceefc5191b4b3d61f096e7c3d8e689)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
109 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
110 {
111   PetscErrorCode    ierr;
112   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
113 
114   PetscFunctionBegin;
115   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
116     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
117   } else {
118     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
119   }
120   PetscFunctionReturn(0);
121 }
122 
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
127 {
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
129   PetscErrorCode ierr;
130   PetscInt       i,rstart,nrows,*rows;
131 
132   PetscFunctionBegin;
133   *zrows = NULL;
134   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
135   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
136   for (i=0; i<nrows; i++) rows[i] += rstart;
137   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
144 {
145   PetscErrorCode ierr;
146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
147   PetscInt       i,n,*garray = aij->garray;
148   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
149   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
150   PetscReal      *work;
151 
152   PetscFunctionBegin;
153   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
154   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
155   if (type == NORM_2) {
156     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
157       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
158     }
159     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
160       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
161     }
162   } else if (type == NORM_1) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
168     }
169   } else if (type == NORM_INFINITY) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
175     }
176 
177   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
178   if (type == NORM_INFINITY) {
179     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
180   } else {
181     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
182   }
183   ierr = PetscFree(work);CHKERRQ(ierr);
184   if (type == NORM_2) {
185     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatDistribute_MPIAIJ"
192 /*
193     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
194     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
195 
196     Only for square matrices
197 
198     Used by a preconditioner, hence PETSC_EXTERN
199 */
200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
201 {
202   PetscMPIInt    rank,size;
203   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
204   PetscErrorCode ierr;
205   Mat            mat;
206   Mat_SeqAIJ     *gmata;
207   PetscMPIInt    tag;
208   MPI_Status     status;
209   PetscBool      aij;
210   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215   if (!rank) {
216     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
217     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
218   }
219   if (reuse == MAT_INITIAL_MATRIX) {
220     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
221     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
222     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
223     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
224     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
225     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
226     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
227     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
228     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
229 
230     rowners[0] = 0;
231     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
232     rstart = rowners[rank];
233     rend   = rowners[rank+1];
234     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
235     if (!rank) {
236       gmata = (Mat_SeqAIJ*) gmat->data;
237       /* send row lengths to all processors */
238       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
239       for (i=1; i<size; i++) {
240         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242       /* determine number diagonal and off-diagonal counts */
243       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
244       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
245       jj   = 0;
246       for (i=0; i<m; i++) {
247         for (j=0; j<dlens[i]; j++) {
248           if (gmata->j[jj] < rstart) ld[i]++;
249           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
250           jj++;
251         }
252       }
253       /* send column indices to other processes */
254       for (i=1; i<size; i++) {
255         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
256         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
257         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259 
260       /* send numerical values to other processes */
261       for (i=1; i<size; i++) {
262         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
263         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
264       }
265       gmataa = gmata->a;
266       gmataj = gmata->j;
267 
268     } else {
269       /* receive row lengths */
270       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
271       /* receive column indices */
272       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
273       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
275       /* determine number diagonal and off-diagonal counts */
276       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
277       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
278       jj   = 0;
279       for (i=0; i<m; i++) {
280         for (j=0; j<dlens[i]; j++) {
281           if (gmataj[jj] < rstart) ld[i]++;
282           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
283           jj++;
284         }
285       }
286       /* receive numerical values */
287       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
288       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
289     }
290     /* set preallocation */
291     for (i=0; i<m; i++) {
292       dlens[i] -= olens[i];
293     }
294     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
295     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
296 
297     for (i=0; i<m; i++) {
298       dlens[i] += olens[i];
299     }
300     cnt = 0;
301     for (i=0; i<m; i++) {
302       row  = rstart + i;
303       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
304       cnt += dlens[i];
305     }
306     if (rank) {
307       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
308     }
309     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
310     ierr = PetscFree(rowners);CHKERRQ(ierr);
311 
312     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
313 
314     *inmat = mat;
315   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
316     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
317     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
318     mat  = *inmat;
319     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
320     if (!rank) {
321       /* send numerical values to other processes */
322       gmata  = (Mat_SeqAIJ*) gmat->data;
323       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
324       gmataa = gmata->a;
325       for (i=1; i<size; i++) {
326         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
327         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
328       }
329       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
330     } else {
331       /* receive numerical values from process 0*/
332       nz   = Ad->nz + Ao->nz;
333       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
334       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
335     }
336     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
337     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
338     ad = Ad->a;
339     ao = Ao->a;
340     if (mat->rmap->n) {
341       i  = 0;
342       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
343       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
344     }
345     for (i=1; i<mat->rmap->n; i++) {
346       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
347       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
348     }
349     i--;
350     if (mat->rmap->n) {
351       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
352     }
353     if (rank) {
354       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
355     }
356   }
357   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to acess.
368 */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
372 {
373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
374   PetscErrorCode ierr;
375   PetscInt       n = aij->B->cmap->n,i;
376 
377   PetscFunctionBegin;
378   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
379 #if defined(PETSC_USE_CTABLE)
380   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
381   for (i=0; i<n; i++) {
382     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
383   }
384 #else
385   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
386   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
387   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
388 #endif
389   PetscFunctionReturn(0);
390 }
391 
392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
393 { \
394     if (col <= lastcol1)  low1 = 0;     \
395     else                 high1 = nrow1; \
396     lastcol1 = col;\
397     while (high1-low1 > 5) { \
398       t = (low1+high1)/2; \
399       if (rp1[t] > col) high1 = t; \
400       else              low1  = t; \
401     } \
402       for (_i=low1; _i<high1; _i++) { \
403         if (rp1[_i] > col) break; \
404         if (rp1[_i] == col) { \
405           if (addv == ADD_VALUES) ap1[_i] += value;   \
406           else                    ap1[_i] = value; \
407           goto a_noinsert; \
408         } \
409       }  \
410       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
411       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
412       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
413       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
414       N = nrow1++ - 1; a->nz++; high1++; \
415       /* shift up all the later entries in this row */ \
416       for (ii=N; ii>=_i; ii--) { \
417         rp1[ii+1] = rp1[ii]; \
418         ap1[ii+1] = ap1[ii]; \
419       } \
420       rp1[_i] = col;  \
421       ap1[_i] = value;  \
422       A->nonzerostate++;\
423       a_noinsert: ; \
424       ailen[row] = nrow1; \
425 }
426 
427 
428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
429   { \
430     if (col <= lastcol2) low2 = 0;                        \
431     else high2 = nrow2;                                   \
432     lastcol2 = col;                                       \
433     while (high2-low2 > 5) {                              \
434       t = (low2+high2)/2;                                 \
435       if (rp2[t] > col) high2 = t;                        \
436       else             low2  = t;                         \
437     }                                                     \
438     for (_i=low2; _i<high2; _i++) {                       \
439       if (rp2[_i] > col) break;                           \
440       if (rp2[_i] == col) {                               \
441         if (addv == ADD_VALUES) ap2[_i] += value;         \
442         else                    ap2[_i] = value;          \
443         goto b_noinsert;                                  \
444       }                                                   \
445     }                                                     \
446     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
447     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
448     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
449     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
450     N = nrow2++ - 1; b->nz++; high2++;                    \
451     /* shift up all the later entries in this row */      \
452     for (ii=N; ii>=_i; ii--) {                            \
453       rp2[ii+1] = rp2[ii];                                \
454       ap2[ii+1] = ap2[ii];                                \
455     }                                                     \
456     rp2[_i] = col;                                        \
457     ap2[_i] = value;                                      \
458     B->nonzerostate++;                                    \
459     b_noinsert: ;                                         \
460     bilen[row] = nrow2;                                   \
461   }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
466 {
467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
468   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
469   PetscErrorCode ierr;
470   PetscInt       l,*garray = mat->garray,diag;
471 
472   PetscFunctionBegin;
473   /* code only works for square matrices A */
474 
475   /* find size of row to the left of the diagonal part */
476   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
477   row  = row - diag;
478   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
479     if (garray[b->j[b->i[row]+l]] > diag) break;
480   }
481   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
482 
483   /* diagonal part */
484   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
485 
486   /* right of diagonal part */
487   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetValues_MPIAIJ"
493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
494 {
495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
496   PetscScalar    value;
497   PetscErrorCode ierr;
498   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
499   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
500   PetscBool      roworiented = aij->roworiented;
501 
502   /* Some Variables required in the macro */
503   Mat        A                 = aij->A;
504   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
505   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
506   MatScalar  *aa               = a->a;
507   PetscBool  ignorezeroentries = a->ignorezeroentries;
508   Mat        B                 = aij->B;
509   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
510   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
511   MatScalar  *ba               = b->a;
512 
513   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
514   PetscInt  nonew;
515   MatScalar *ap1,*ap2;
516 
517   PetscFunctionBegin;
518   for (i=0; i<m; i++) {
519     if (im[i] < 0) continue;
520 #if defined(PETSC_USE_DEBUG)
521     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
522 #endif
523     if (im[i] >= rstart && im[i] < rend) {
524       row      = im[i] - rstart;
525       lastcol1 = -1;
526       rp1      = aj + ai[row];
527       ap1      = aa + ai[row];
528       rmax1    = aimax[row];
529       nrow1    = ailen[row];
530       low1     = 0;
531       high1    = nrow1;
532       lastcol2 = -1;
533       rp2      = bj + bi[row];
534       ap2      = ba + bi[row];
535       rmax2    = bimax[row];
536       nrow2    = bilen[row];
537       low2     = 0;
538       high2    = nrow2;
539 
540       for (j=0; j<n; j++) {
541         if (roworiented) value = v[i*n+j];
542         else             value = v[i+j*m];
543         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
544         if (in[j] >= cstart && in[j] < cend) {
545           col   = in[j] - cstart;
546           nonew = a->nonew;
547           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
548         } else if (in[j] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
551 #endif
552         else {
553           if (mat->was_assembled) {
554             if (!aij->colmap) {
555               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
556             }
557 #if defined(PETSC_USE_CTABLE)
558             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
559             col--;
560 #else
561             col = aij->colmap[in[j]] - 1;
562 #endif
563             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
564               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
565               col  =  in[j];
566               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
567               B     = aij->B;
568               b     = (Mat_SeqAIJ*)B->data;
569               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
570               rp2   = bj + bi[row];
571               ap2   = ba + bi[row];
572               rmax2 = bimax[row];
573               nrow2 = bilen[row];
574               low2  = 0;
575               high2 = nrow2;
576               bm    = aij->B->rmap->n;
577               ba    = b->a;
578             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
579           } else col = in[j];
580           nonew = b->nonew;
581           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
582         }
583       }
584     } else {
585       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
586       if (!aij->donotstash) {
587         mat->assembled = PETSC_FALSE;
588         if (roworiented) {
589           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
590         } else {
591           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
592         }
593       }
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatGetValues_MPIAIJ"
601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
602 {
603   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
604   PetscErrorCode ierr;
605   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
607 
608   PetscFunctionBegin;
609   for (i=0; i<m; i++) {
610     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
611     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
612     if (idxm[i] >= rstart && idxm[i] < rend) {
613       row = idxm[i] - rstart;
614       for (j=0; j<n; j++) {
615         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
616         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
617         if (idxn[j] >= cstart && idxn[j] < cend) {
618           col  = idxn[j] - cstart;
619           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620         } else {
621           if (!aij->colmap) {
622             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
623           }
624 #if defined(PETSC_USE_CTABLE)
625           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
626           col--;
627 #else
628           col = aij->colmap[idxn[j]] - 1;
629 #endif
630           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
631           else {
632             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   /* make sure all processors are either in INSERTMODE or ADDMODE */
656   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
658   mat->insertmode = addv; /* in case this processor had no cache */
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
672   PetscErrorCode ierr;
673   PetscMPIInt    n;
674   PetscInt       i,j,rstart,ncols,flg;
675   PetscInt       *row,*col;
676   PetscBool      other_disassembled;
677   PetscScalar    *val;
678   InsertMode     addv = mat->insertmode;
679 
680   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
681 
682   PetscFunctionBegin;
683   if (!aij->donotstash && !mat->nooffprocentries) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       if (!flg) break;
687 
688       for (i=0; i<n; ) {
689         /* Now identify the consecutive vals belonging to the same row */
690         for (j=i,rstart=row[j]; j<n; j++) {
691           if (row[j] != rstart) break;
692         }
693         if (j < n) ncols = j-i;
694         else       ncols = n-i;
695         /* Now assemble all these values with a single function call */
696         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
697 
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702   }
703   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
704   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
705 
706   /* determine if any processor has disassembled, if so we must
707      also disassemble ourselfs, in order that we may reassemble. */
708   /*
709      if nonzero structure of submatrix B cannot change then we know that
710      no processor disassembled thus we can skip this stuff
711   */
712   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
713     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714     if (mat->was_assembled && !other_disassembled) {
715       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
716     }
717   }
718   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
719     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
720   }
721   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
722   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
723   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
724 
725   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
726 
727   aij->rowvalues = 0;
728 
729   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
730   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
731 
732   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
733   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
734     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
735     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
742 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
743 {
744   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
749   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatZeroRows_MPIAIJ"
755 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
756 {
757   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
758   PetscInt      *owners = A->rmap->range;
759   PetscInt       n      = A->rmap->n;
760   PetscSF        sf;
761   PetscInt      *lrows;
762   PetscSFNode   *rrows;
763   PetscInt       r, p = 0, len = 0;
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   /* Create SF where leaves are input rows and roots are owned rows */
768   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
769   for (r = 0; r < n; ++r) lrows[r] = -1;
770   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
771   for (r = 0; r < N; ++r) {
772     const PetscInt idx   = rows[r];
773     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
774     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
775       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
776     }
777     if (A->nooffproczerorows) {
778       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
779       lrows[len++] = idx - owners[p];
780     } else {
781       rrows[r].rank = p;
782       rrows[r].index = rows[r] - owners[p];
783     }
784   }
785   if (!A->nooffproczerorows) {
786     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
787     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
788     /* Collect flags for rows to be zeroed */
789     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
790     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
791     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
792     /* Compress and put in row numbers */
793     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
794   }
795   /* fix right hand side if needed */
796   if (x && b) {
797     const PetscScalar *xx;
798     PetscScalar       *bb;
799 
800     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
801     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
802     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
803     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
804     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
805   }
806   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
807   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
808   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
809     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
810   } else if (diag != 0.0) {
811     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
813     for (r = 0; r < len; ++r) {
814       const PetscInt row = lrows[r] + A->rmap->rstart;
815       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
816     }
817     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
818     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819   } else {
820     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
821   }
822   ierr = PetscFree(lrows);CHKERRQ(ierr);
823 
824   /* only change matrix nonzero state if pattern was allowed to be changed */
825   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
826     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
827     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
828   }
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
834 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
835 {
836   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
837   PetscErrorCode    ierr;
838   PetscMPIInt       n = A->rmap->n;
839   PetscInt          i,j,r,m,p = 0,len = 0;
840   PetscInt          *lrows,*owners = A->rmap->range;
841   PetscSFNode       *rrows;
842   PetscSF           sf;
843   const PetscScalar *xx;
844   PetscScalar       *bb,*mask;
845   Vec               xmask,lmask;
846   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
847   const PetscInt    *aj, *ii,*ridx;
848   PetscScalar       *aa;
849 
850   PetscFunctionBegin;
851   /* Create SF where leaves are input rows and roots are owned rows */
852   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
853   for (r = 0; r < n; ++r) lrows[r] = -1;
854   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
855   for (r = 0; r < N; ++r) {
856     const PetscInt idx   = rows[r];
857     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
858     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
859       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
860     }
861     rrows[r].rank  = p;
862     rrows[r].index = rows[r] - owners[p];
863   }
864   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
865   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
866   /* Collect flags for rows to be zeroed */
867   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
868   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
869   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
870   /* Compress and put in row numbers */
871   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
872   /* zero diagonal part of matrix */
873   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
874   /* handle off diagonal part of matrix */
875   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
876   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
877   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
878   for (i=0; i<len; i++) bb[lrows[i]] = 1;
879   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
880   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
883   if (x) {
884     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
887     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
888   }
889   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
890   /* remove zeroed rows of off diagonal matrix */
891   ii = aij->i;
892   for (i=0; i<len; i++) {
893     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
894   }
895   /* loop over all elements of off process part of matrix zeroing removed columns*/
896   if (aij->compressedrow.use) {
897     m    = aij->compressedrow.nrows;
898     ii   = aij->compressedrow.i;
899     ridx = aij->compressedrow.rindex;
900     for (i=0; i<m; i++) {
901       n  = ii[i+1] - ii[i];
902       aj = aij->j + ii[i];
903       aa = aij->a + ii[i];
904 
905       for (j=0; j<n; j++) {
906         if (PetscAbsScalar(mask[*aj])) {
907           if (b) bb[*ridx] -= *aa*xx[*aj];
908           *aa = 0.0;
909         }
910         aa++;
911         aj++;
912       }
913       ridx++;
914     }
915   } else { /* do not use compressed row format */
916     m = l->B->rmap->n;
917     for (i=0; i<m; i++) {
918       n  = ii[i+1] - ii[i];
919       aj = aij->j + ii[i];
920       aa = aij->a + ii[i];
921       for (j=0; j<n; j++) {
922         if (PetscAbsScalar(mask[*aj])) {
923           if (b) bb[i] -= *aa*xx[*aj];
924           *aa = 0.0;
925         }
926         aa++;
927         aj++;
928       }
929     }
930   }
931   if (x) {
932     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
933     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
934   }
935   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
936   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
937   ierr = PetscFree(lrows);CHKERRQ(ierr);
938 
939   /* only change matrix nonzero state if pattern was allowed to be changed */
940   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
941     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
942     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
943   }
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatMult_MPIAIJ"
949 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953   PetscInt       nt;
954 
955   PetscFunctionBegin;
956   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
957   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
958   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
960   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatMultAdd_MPIAIJ"
979 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
980 {
981   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
987   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
988   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
994 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
995 {
996   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
997   PetscErrorCode ierr;
998   PetscBool      merged;
999 
1000   PetscFunctionBegin;
1001   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1002   /* do nondiagonal part */
1003   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1004   if (!merged) {
1005     /* send it on its way */
1006     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1007     /* do local part */
1008     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1009     /* receive remote parts: note this assumes the values are not actually */
1010     /* added in yy until the next line, */
1011     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1012   } else {
1013     /* do local part */
1014     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1015     /* send it on its way */
1016     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1017     /* values actually were received in the Begin() but we need to call this nop */
1018     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1019   }
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 #undef __FUNCT__
1024 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1025 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1026 {
1027   MPI_Comm       comm;
1028   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1029   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1030   IS             Me,Notme;
1031   PetscErrorCode ierr;
1032   PetscInt       M,N,first,last,*notme,i;
1033   PetscMPIInt    size;
1034 
1035   PetscFunctionBegin;
1036   /* Easy test: symmetric diagonal block */
1037   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1038   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1039   if (!*f) PetscFunctionReturn(0);
1040   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1041   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1042   if (size == 1) PetscFunctionReturn(0);
1043 
1044   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1045   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1046   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1047   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1048   for (i=0; i<first; i++) notme[i] = i;
1049   for (i=last; i<M; i++) notme[i-last+first] = i;
1050   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1051   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1052   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1053   Aoff = Aoffs[0];
1054   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1055   Boff = Boffs[0];
1056   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1057   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1059   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1061   ierr = PetscFree(notme);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071 
1072   PetscFunctionBegin;
1073   /* do nondiagonal part */
1074   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1075   /* send it on its way */
1076   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1077   /* do local part */
1078   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1079   /* receive remote parts */
1080   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 /*
1085   This only works correctly for square matrices where the subblock A->A is the
1086    diagonal block
1087 */
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1090 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1091 {
1092   PetscErrorCode ierr;
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094 
1095   PetscFunctionBegin;
1096   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1097   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1098   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatScale_MPIAIJ"
1104 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1105 {
1106   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1111   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatDestroy_MPIAIJ"
1117 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1118 {
1119   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1120   PetscErrorCode ierr;
1121 
1122   PetscFunctionBegin;
1123 #if defined(PETSC_USE_LOG)
1124   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1125 #endif
1126   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1127   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1128   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1129   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1130 #if defined(PETSC_USE_CTABLE)
1131   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1132 #else
1133   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1134 #endif
1135   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1136   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1137   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1138   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1139   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1140   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1141 
1142   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1151 #if defined(PETSC_HAVE_ELEMENTAL)
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1153 #endif
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1159 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1160 {
1161   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1162   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1163   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1164   PetscErrorCode ierr;
1165   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1166   int            fd;
1167   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1168   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1169   PetscScalar    *column_values;
1170   PetscInt       message_count,flowcontrolcount;
1171   FILE           *file;
1172 
1173   PetscFunctionBegin;
1174   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1175   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1176   nz   = A->nz + B->nz;
1177   if (!rank) {
1178     header[0] = MAT_FILE_CLASSID;
1179     header[1] = mat->rmap->N;
1180     header[2] = mat->cmap->N;
1181 
1182     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1183     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1184     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1185     /* get largest number of rows any processor has */
1186     rlen  = mat->rmap->n;
1187     range = mat->rmap->range;
1188     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1189   } else {
1190     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1191     rlen = mat->rmap->n;
1192   }
1193 
1194   /* load up the local row counts */
1195   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1196   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1197 
1198   /* store the row lengths to the file */
1199   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1200   if (!rank) {
1201     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1202     for (i=1; i<size; i++) {
1203       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1204       rlen = range[i+1] - range[i];
1205       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1206       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1207     }
1208     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1209   } else {
1210     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1211     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1213   }
1214   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1215 
1216   /* load up the local column indices */
1217   nzmax = nz; /* th processor needs space a largest processor needs */
1218   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1220   cnt   = 0;
1221   for (i=0; i<mat->rmap->n; i++) {
1222     for (j=B->i[i]; j<B->i[i+1]; j++) {
1223       if ((col = garray[B->j[j]]) > cstart) break;
1224       column_indices[cnt++] = col;
1225     }
1226     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1227     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1228   }
1229   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1230 
1231   /* store the column indices to the file */
1232   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1233   if (!rank) {
1234     MPI_Status status;
1235     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1236     for (i=1; i<size; i++) {
1237       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1238       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1239       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1240       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1246     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1248     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1249   }
1250   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1251 
1252   /* load up the local column values */
1253   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1254   cnt  = 0;
1255   for (i=0; i<mat->rmap->n; i++) {
1256     for (j=B->i[i]; j<B->i[i+1]; j++) {
1257       if (garray[B->j[j]] > cstart) break;
1258       column_values[cnt++] = B->a[j];
1259     }
1260     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1261     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1262   }
1263   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1264 
1265   /* store the column values to the file */
1266   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1267   if (!rank) {
1268     MPI_Status status;
1269     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1270     for (i=1; i<size; i++) {
1271       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1272       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1273       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1274       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1275       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1276     }
1277     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1278   } else {
1279     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1280     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1283   }
1284   ierr = PetscFree(column_values);CHKERRQ(ierr);
1285 
1286   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1287   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #include <petscdraw.h>
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1294 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1295 {
1296   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1297   PetscErrorCode    ierr;
1298   PetscMPIInt       rank = aij->rank,size = aij->size;
1299   PetscBool         isdraw,iascii,isbinary;
1300   PetscViewer       sviewer;
1301   PetscViewerFormat format;
1302 
1303   PetscFunctionBegin;
1304   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1305   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1306   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1307   if (iascii) {
1308     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1309     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1310       MatInfo   info;
1311       PetscBool inodes;
1312 
1313       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1314       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1315       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1317       if (!inodes) {
1318         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1319                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1320       } else {
1321         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1322                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1323       }
1324       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1325       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1326       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1328       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1329       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1330       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1331       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1332       PetscFunctionReturn(0);
1333     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1334       PetscInt inodecount,inodelimit,*inodes;
1335       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1336       if (inodes) {
1337         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1338       } else {
1339         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1340       }
1341       PetscFunctionReturn(0);
1342     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1343       PetscFunctionReturn(0);
1344     }
1345   } else if (isbinary) {
1346     if (size == 1) {
1347       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1348       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1349     } else {
1350       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1351     }
1352     PetscFunctionReturn(0);
1353   } else if (isdraw) {
1354     PetscDraw draw;
1355     PetscBool isnull;
1356     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1357     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1358   }
1359 
1360   {
1361     /* assemble the entire matrix onto first processor. */
1362     Mat        A;
1363     Mat_SeqAIJ *Aloc;
1364     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1365     MatScalar  *a;
1366 
1367     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1368     if (!rank) {
1369       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1370     } else {
1371       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1372     }
1373     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1374     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1375     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1376     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1377     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1378 
1379     /* copy over the A part */
1380     Aloc = (Mat_SeqAIJ*)aij->A->data;
1381     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1382     row  = mat->rmap->rstart;
1383     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1384     for (i=0; i<m; i++) {
1385       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1386       row++;
1387       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1388     }
1389     aj = Aloc->j;
1390     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1391 
1392     /* copy over the B part */
1393     Aloc = (Mat_SeqAIJ*)aij->B->data;
1394     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1395     row  = mat->rmap->rstart;
1396     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1397     ct   = cols;
1398     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1399     for (i=0; i<m; i++) {
1400       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1401       row++;
1402       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1403     }
1404     ierr = PetscFree(ct);CHKERRQ(ierr);
1405     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1406     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1407     /*
1408        Everyone has to call to draw the matrix since the graphics waits are
1409        synchronized across all processors that share the PetscDraw object
1410     */
1411     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1412     if (!rank) {
1413       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1414     }
1415     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1416     ierr = MatDestroy(&A);CHKERRQ(ierr);
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatView_MPIAIJ"
1423 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1424 {
1425   PetscErrorCode ierr;
1426   PetscBool      iascii,isdraw,issocket,isbinary;
1427 
1428   PetscFunctionBegin;
1429   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1430   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1433   if (iascii || isdraw || isbinary || issocket) {
1434     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1435   }
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatSOR_MPIAIJ"
1441 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1442 {
1443   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1444   PetscErrorCode ierr;
1445   Vec            bb1 = 0;
1446   PetscBool      hasop;
1447 
1448   PetscFunctionBegin;
1449   if (flag == SOR_APPLY_UPPER) {
1450     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1451     PetscFunctionReturn(0);
1452   }
1453 
1454   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1455     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1456   }
1457 
1458   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1461       its--;
1462     }
1463 
1464     while (its--) {
1465       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1466       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1467 
1468       /* update rhs: bb1 = bb - B*x */
1469       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1470       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1471 
1472       /* local sweep */
1473       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1474     }
1475   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1476     if (flag & SOR_ZERO_INITIAL_GUESS) {
1477       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1478       its--;
1479     }
1480     while (its--) {
1481       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1482       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1483 
1484       /* update rhs: bb1 = bb - B*x */
1485       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1486       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1487 
1488       /* local sweep */
1489       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1490     }
1491   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1492     if (flag & SOR_ZERO_INITIAL_GUESS) {
1493       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1494       its--;
1495     }
1496     while (its--) {
1497       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1499 
1500       /* update rhs: bb1 = bb - B*x */
1501       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1502       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1503 
1504       /* local sweep */
1505       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1506     }
1507   } else if (flag & SOR_EISENSTAT) {
1508     Vec xx1;
1509 
1510     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1511     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1512 
1513     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1515     if (!mat->diag) {
1516       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1517       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1518     }
1519     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1520     if (hasop) {
1521       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1522     } else {
1523       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1524     }
1525     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1526 
1527     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1528 
1529     /* local sweep */
1530     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1531     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1532     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1533   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1534 
1535   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatPermute_MPIAIJ"
1541 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1542 {
1543   Mat            aA,aB,Aperm;
1544   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1545   PetscScalar    *aa,*ba;
1546   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1547   PetscSF        rowsf,sf;
1548   IS             parcolp = NULL;
1549   PetscBool      done;
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1554   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1555   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1556   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1557 
1558   /* Invert row permutation to find out where my rows should go */
1559   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1560   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1561   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1562   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1563   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1564   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1565 
1566   /* Invert column permutation to find out where my columns should go */
1567   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1568   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1569   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1570   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1571   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1572   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1573   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1574 
1575   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1576   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1577   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1578 
1579   /* Find out where my gcols should go */
1580   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1581   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1582   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1583   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1584   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1585   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1587   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1588 
1589   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1590   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1591   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) {
1593     PetscInt row = rdest[i],rowner;
1594     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1595     for (j=ai[i]; j<ai[i+1]; j++) {
1596       PetscInt cowner,col = cdest[aj[j]];
1597       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1598       if (rowner == cowner) dnnz[i]++;
1599       else onnz[i]++;
1600     }
1601     for (j=bi[i]; j<bi[i+1]; j++) {
1602       PetscInt cowner,col = gcdest[bj[j]];
1603       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1604       if (rowner == cowner) dnnz[i]++;
1605       else onnz[i]++;
1606     }
1607   }
1608   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1609   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1610   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1611   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1612   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1613 
1614   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1615   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1616   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1617   for (i=0; i<m; i++) {
1618     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1619     PetscInt j0,rowlen;
1620     rowlen = ai[i+1] - ai[i];
1621     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1622       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1623       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1624     }
1625     rowlen = bi[i+1] - bi[i];
1626     for (j0=j=0; j<rowlen; j0=j) {
1627       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1628       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1629     }
1630   }
1631   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1632   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1633   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1634   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1635   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1636   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1637   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1638   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1639   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1640   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1641   *B = Aperm;
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1647 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1648 {
1649   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1650   Mat            A    = mat->A,B = mat->B;
1651   PetscErrorCode ierr;
1652   PetscReal      isend[5],irecv[5];
1653 
1654   PetscFunctionBegin;
1655   info->block_size = 1.0;
1656   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1657 
1658   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1659   isend[3] = info->memory;  isend[4] = info->mallocs;
1660 
1661   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1662 
1663   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1664   isend[3] += info->memory;  isend[4] += info->mallocs;
1665   if (flag == MAT_LOCAL) {
1666     info->nz_used      = isend[0];
1667     info->nz_allocated = isend[1];
1668     info->nz_unneeded  = isend[2];
1669     info->memory       = isend[3];
1670     info->mallocs      = isend[4];
1671   } else if (flag == MAT_GLOBAL_MAX) {
1672     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1673 
1674     info->nz_used      = irecv[0];
1675     info->nz_allocated = irecv[1];
1676     info->nz_unneeded  = irecv[2];
1677     info->memory       = irecv[3];
1678     info->mallocs      = irecv[4];
1679   } else if (flag == MAT_GLOBAL_SUM) {
1680     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1681 
1682     info->nz_used      = irecv[0];
1683     info->nz_allocated = irecv[1];
1684     info->nz_unneeded  = irecv[2];
1685     info->memory       = irecv[3];
1686     info->mallocs      = irecv[4];
1687   }
1688   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1689   info->fill_ratio_needed = 0;
1690   info->factor_mallocs    = 0;
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatSetOption_MPIAIJ"
1696 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1697 {
1698   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1699   PetscErrorCode ierr;
1700 
1701   PetscFunctionBegin;
1702   switch (op) {
1703   case MAT_NEW_NONZERO_LOCATIONS:
1704   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1705   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1706   case MAT_KEEP_NONZERO_PATTERN:
1707   case MAT_NEW_NONZERO_LOCATION_ERR:
1708   case MAT_USE_INODES:
1709   case MAT_IGNORE_ZERO_ENTRIES:
1710     MatCheckPreallocated(A,1);
1711     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1712     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1713     break;
1714   case MAT_ROW_ORIENTED:
1715     a->roworiented = flg;
1716 
1717     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1718     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1719     break;
1720   case MAT_NEW_DIAGONALS:
1721     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1722     break;
1723   case MAT_IGNORE_OFF_PROC_ENTRIES:
1724     a->donotstash = flg;
1725     break;
1726   case MAT_SPD:
1727     A->spd_set = PETSC_TRUE;
1728     A->spd     = flg;
1729     if (flg) {
1730       A->symmetric                  = PETSC_TRUE;
1731       A->structurally_symmetric     = PETSC_TRUE;
1732       A->symmetric_set              = PETSC_TRUE;
1733       A->structurally_symmetric_set = PETSC_TRUE;
1734     }
1735     break;
1736   case MAT_SYMMETRIC:
1737     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1738     break;
1739   case MAT_STRUCTURALLY_SYMMETRIC:
1740     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1741     break;
1742   case MAT_HERMITIAN:
1743     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1744     break;
1745   case MAT_SYMMETRY_ETERNAL:
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     break;
1748   default:
1749     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatGetRow_MPIAIJ"
1756 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1757 {
1758   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1759   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1760   PetscErrorCode ierr;
1761   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1762   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1763   PetscInt       *cmap,*idx_p;
1764 
1765   PetscFunctionBegin;
1766   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1767   mat->getrowactive = PETSC_TRUE;
1768 
1769   if (!mat->rowvalues && (idx || v)) {
1770     /*
1771         allocate enough space to hold information from the longest row.
1772     */
1773     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1774     PetscInt   max = 1,tmp;
1775     for (i=0; i<matin->rmap->n; i++) {
1776       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1777       if (max < tmp) max = tmp;
1778     }
1779     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1780   }
1781 
1782   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1783   lrow = row - rstart;
1784 
1785   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1786   if (!v)   {pvA = 0; pvB = 0;}
1787   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1788   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1789   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1790   nztot = nzA + nzB;
1791 
1792   cmap = mat->garray;
1793   if (v  || idx) {
1794     if (nztot) {
1795       /* Sort by increasing column numbers, assuming A and B already sorted */
1796       PetscInt imark = -1;
1797       if (v) {
1798         *v = v_p = mat->rowvalues;
1799         for (i=0; i<nzB; i++) {
1800           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1801           else break;
1802         }
1803         imark = i;
1804         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1805         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1806       }
1807       if (idx) {
1808         *idx = idx_p = mat->rowindices;
1809         if (imark > -1) {
1810           for (i=0; i<imark; i++) {
1811             idx_p[i] = cmap[cworkB[i]];
1812           }
1813         } else {
1814           for (i=0; i<nzB; i++) {
1815             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1816             else break;
1817           }
1818           imark = i;
1819         }
1820         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1821         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1822       }
1823     } else {
1824       if (idx) *idx = 0;
1825       if (v)   *v   = 0;
1826     }
1827   }
1828   *nz  = nztot;
1829   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1830   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1836 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1837 {
1838   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1839 
1840   PetscFunctionBegin;
1841   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1842   aij->getrowactive = PETSC_FALSE;
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatNorm_MPIAIJ"
1848 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1849 {
1850   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1851   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1852   PetscErrorCode ierr;
1853   PetscInt       i,j,cstart = mat->cmap->rstart;
1854   PetscReal      sum = 0.0;
1855   MatScalar      *v;
1856 
1857   PetscFunctionBegin;
1858   if (aij->size == 1) {
1859     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1860   } else {
1861     if (type == NORM_FROBENIUS) {
1862       v = amat->a;
1863       for (i=0; i<amat->nz; i++) {
1864         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1865       }
1866       v = bmat->a;
1867       for (i=0; i<bmat->nz; i++) {
1868         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1869       }
1870       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1871       *norm = PetscSqrtReal(*norm);
1872     } else if (type == NORM_1) { /* max column norm */
1873       PetscReal *tmp,*tmp2;
1874       PetscInt  *jj,*garray = aij->garray;
1875       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1876       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1877       *norm = 0.0;
1878       v     = amat->a; jj = amat->j;
1879       for (j=0; j<amat->nz; j++) {
1880         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1881       }
1882       v = bmat->a; jj = bmat->j;
1883       for (j=0; j<bmat->nz; j++) {
1884         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1885       }
1886       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1887       for (j=0; j<mat->cmap->N; j++) {
1888         if (tmp2[j] > *norm) *norm = tmp2[j];
1889       }
1890       ierr = PetscFree(tmp);CHKERRQ(ierr);
1891       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1892     } else if (type == NORM_INFINITY) { /* max row norm */
1893       PetscReal ntemp = 0.0;
1894       for (j=0; j<aij->A->rmap->n; j++) {
1895         v   = amat->a + amat->i[j];
1896         sum = 0.0;
1897         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1898           sum += PetscAbsScalar(*v); v++;
1899         }
1900         v = bmat->a + bmat->i[j];
1901         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1902           sum += PetscAbsScalar(*v); v++;
1903         }
1904         if (sum > ntemp) ntemp = sum;
1905       }
1906       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1907     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1908   }
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatTranspose_MPIAIJ"
1914 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1915 {
1916   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1917   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1918   PetscErrorCode ierr;
1919   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1920   PetscInt       cstart = A->cmap->rstart,ncol;
1921   Mat            B;
1922   MatScalar      *array;
1923 
1924   PetscFunctionBegin;
1925   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1926 
1927   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1928   ai = Aloc->i; aj = Aloc->j;
1929   bi = Bloc->i; bj = Bloc->j;
1930   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1931     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1932     PetscSFNode          *oloc;
1933     PETSC_UNUSED PetscSF sf;
1934 
1935     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1936     /* compute d_nnz for preallocation */
1937     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1938     for (i=0; i<ai[ma]; i++) {
1939       d_nnz[aj[i]]++;
1940       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1941     }
1942     /* compute local off-diagonal contributions */
1943     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1944     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1945     /* map those to global */
1946     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1947     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1948     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1949     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1950     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1951     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1952     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1953 
1954     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1955     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1956     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1957     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1958     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1959     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1960   } else {
1961     B    = *matout;
1962     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1963     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1964   }
1965 
1966   /* copy over the A part */
1967   array = Aloc->a;
1968   row   = A->rmap->rstart;
1969   for (i=0; i<ma; i++) {
1970     ncol = ai[i+1]-ai[i];
1971     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1972     row++;
1973     array += ncol; aj += ncol;
1974   }
1975   aj = Aloc->j;
1976   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1977 
1978   /* copy over the B part */
1979   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1980   array = Bloc->a;
1981   row   = A->rmap->rstart;
1982   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1983   cols_tmp = cols;
1984   for (i=0; i<mb; i++) {
1985     ncol = bi[i+1]-bi[i];
1986     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1987     row++;
1988     array += ncol; cols_tmp += ncol;
1989   }
1990   ierr = PetscFree(cols);CHKERRQ(ierr);
1991 
1992   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1993   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1994   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1995     *matout = B;
1996   } else {
1997     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1998   }
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2004 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2005 {
2006   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2007   Mat            a    = aij->A,b = aij->B;
2008   PetscErrorCode ierr;
2009   PetscInt       s1,s2,s3;
2010 
2011   PetscFunctionBegin;
2012   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2013   if (rr) {
2014     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2015     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2016     /* Overlap communication with computation. */
2017     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2018   }
2019   if (ll) {
2020     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2021     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2022     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2023   }
2024   /* scale  the diagonal block */
2025   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2026 
2027   if (rr) {
2028     /* Do a scatter end and then right scale the off-diagonal block */
2029     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2030     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2031   }
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2037 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2038 {
2039   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2040   PetscErrorCode ierr;
2041 
2042   PetscFunctionBegin;
2043   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatEqual_MPIAIJ"
2049 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2050 {
2051   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2052   Mat            a,b,c,d;
2053   PetscBool      flg;
2054   PetscErrorCode ierr;
2055 
2056   PetscFunctionBegin;
2057   a = matA->A; b = matA->B;
2058   c = matB->A; d = matB->B;
2059 
2060   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2061   if (flg) {
2062     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2063   }
2064   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatCopy_MPIAIJ"
2070 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2071 {
2072   PetscErrorCode ierr;
2073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2074   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2075 
2076   PetscFunctionBegin;
2077   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2078   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2079     /* because of the column compression in the off-processor part of the matrix a->B,
2080        the number of columns in a->B and b->B may be different, hence we cannot call
2081        the MatCopy() directly on the two parts. If need be, we can provide a more
2082        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2083        then copying the submatrices */
2084     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2085   } else {
2086     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2087     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2088   }
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 #undef __FUNCT__
2093 #define __FUNCT__ "MatSetUp_MPIAIJ"
2094 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2095 {
2096   PetscErrorCode ierr;
2097 
2098   PetscFunctionBegin;
2099   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 /*
2104    Computes the number of nonzeros per row needed for preallocation when X and Y
2105    have different nonzero structure.
2106 */
2107 #undef __FUNCT__
2108 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2109 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2110 {
2111   PetscInt       i,j,k,nzx,nzy;
2112 
2113   PetscFunctionBegin;
2114   /* Set the number of nonzeros in the new matrix */
2115   for (i=0; i<m; i++) {
2116     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2117     nzx = xi[i+1] - xi[i];
2118     nzy = yi[i+1] - yi[i];
2119     nnz[i] = 0;
2120     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2121       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2122       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2123       nnz[i]++;
2124     }
2125     for (; k<nzy; k++) nnz[i]++;
2126   }
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2133 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2134 {
2135   PetscErrorCode ierr;
2136   PetscInt       m = Y->rmap->N;
2137   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2138   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2139 
2140   PetscFunctionBegin;
2141   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #undef __FUNCT__
2146 #define __FUNCT__ "MatAXPY_MPIAIJ"
2147 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2151   PetscBLASInt   bnz,one=1;
2152   Mat_SeqAIJ     *x,*y;
2153 
2154   PetscFunctionBegin;
2155   if (str == SAME_NONZERO_PATTERN) {
2156     PetscScalar alpha = a;
2157     x    = (Mat_SeqAIJ*)xx->A->data;
2158     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2159     y    = (Mat_SeqAIJ*)yy->A->data;
2160     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2161     x    = (Mat_SeqAIJ*)xx->B->data;
2162     y    = (Mat_SeqAIJ*)yy->B->data;
2163     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2164     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2165     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2166   } else {
2167     Mat      B;
2168     PetscInt *nnz_d,*nnz_o;
2169     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2170     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2171     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2172     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2173     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2174     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2175     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2176     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2177     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2178     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2179     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2180     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2181     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2182     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2183   }
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatConjugate_MPIAIJ"
2191 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2192 {
2193 #if defined(PETSC_USE_COMPLEX)
2194   PetscErrorCode ierr;
2195   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2196 
2197   PetscFunctionBegin;
2198   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2199   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2200 #else
2201   PetscFunctionBegin;
2202 #endif
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 #undef __FUNCT__
2207 #define __FUNCT__ "MatRealPart_MPIAIJ"
2208 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2209 {
2210   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2211   PetscErrorCode ierr;
2212 
2213   PetscFunctionBegin;
2214   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2215   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 #undef __FUNCT__
2220 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2221 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2222 {
2223   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2224   PetscErrorCode ierr;
2225 
2226   PetscFunctionBegin;
2227   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2228   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #if defined(PETSC_HAVE_PBGL)
2233 
2234 #include <boost/parallel/mpi/bsp_process_group.hpp>
2235 #include <boost/graph/distributed/ilu_default_graph.hpp>
2236 #include <boost/graph/distributed/ilu_0_block.hpp>
2237 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2238 #include <boost/graph/distributed/petsc/interface.hpp>
2239 #include <boost/multi_array.hpp>
2240 #include <boost/parallel/distributed_property_map->hpp>
2241 
2242 #undef __FUNCT__
2243 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2244 /*
2245   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2246 */
2247 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2248 {
2249   namespace petsc = boost::distributed::petsc;
2250 
2251   namespace graph_dist = boost::graph::distributed;
2252   using boost::graph::distributed::ilu_default::process_group_type;
2253   using boost::graph::ilu_permuted;
2254 
2255   PetscBool      row_identity, col_identity;
2256   PetscContainer c;
2257   PetscInt       m, n, M, N;
2258   PetscErrorCode ierr;
2259 
2260   PetscFunctionBegin;
2261   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2262   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2263   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2264   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2265 
2266   process_group_type pg;
2267   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2268   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2269   lgraph_type& level_graph = *lgraph_p;
2270   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2271 
2272   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2273   ilu_permuted(level_graph);
2274 
2275   /* put together the new matrix */
2276   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2277   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2278   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2279   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2280   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2281   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2282   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2283   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2284 
2285   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2286   ierr = PetscContainerSetPointer(c, lgraph_p);
2287   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2288   ierr = PetscContainerDestroy(&c);
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2294 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2295 {
2296   PetscFunctionBegin;
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 #undef __FUNCT__
2301 #define __FUNCT__ "MatSolve_MPIAIJ"
2302 /*
2303   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2304 */
2305 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2306 {
2307   namespace graph_dist = boost::graph::distributed;
2308 
2309   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2310   lgraph_type    *lgraph_p;
2311   PetscContainer c;
2312   PetscErrorCode ierr;
2313 
2314   PetscFunctionBegin;
2315   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2316   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2317   ierr = VecCopy(b, x);CHKERRQ(ierr);
2318 
2319   PetscScalar *array_x;
2320   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2321   PetscInt sx;
2322   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2323 
2324   PetscScalar *array_b;
2325   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2326   PetscInt sb;
2327   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2328 
2329   lgraph_type& level_graph = *lgraph_p;
2330   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2331 
2332   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2333   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2334   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2335 
2336   typedef boost::iterator_property_map<array_ref_type::iterator,
2337                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2338   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2339   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2340 
2341   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2342   PetscFunctionReturn(0);
2343 }
2344 #endif
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2348 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2349 {
2350   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2351   PetscErrorCode ierr;
2352   PetscInt       i,*idxb = 0;
2353   PetscScalar    *va,*vb;
2354   Vec            vtmp;
2355 
2356   PetscFunctionBegin;
2357   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2358   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2359   if (idx) {
2360     for (i=0; i<A->rmap->n; i++) {
2361       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2362     }
2363   }
2364 
2365   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2366   if (idx) {
2367     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2368   }
2369   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2370   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2371 
2372   for (i=0; i<A->rmap->n; i++) {
2373     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2374       va[i] = vb[i];
2375       if (idx) idx[i] = a->garray[idxb[i]];
2376     }
2377   }
2378 
2379   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2380   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2381   ierr = PetscFree(idxb);CHKERRQ(ierr);
2382   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 #undef __FUNCT__
2387 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2388 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2389 {
2390   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2391   PetscErrorCode ierr;
2392   PetscInt       i,*idxb = 0;
2393   PetscScalar    *va,*vb;
2394   Vec            vtmp;
2395 
2396   PetscFunctionBegin;
2397   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2398   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2399   if (idx) {
2400     for (i=0; i<A->cmap->n; i++) {
2401       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2402     }
2403   }
2404 
2405   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2406   if (idx) {
2407     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2408   }
2409   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2410   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2411 
2412   for (i=0; i<A->rmap->n; i++) {
2413     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2414       va[i] = vb[i];
2415       if (idx) idx[i] = a->garray[idxb[i]];
2416     }
2417   }
2418 
2419   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2420   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2421   ierr = PetscFree(idxb);CHKERRQ(ierr);
2422   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2428 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2429 {
2430   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2431   PetscInt       n      = A->rmap->n;
2432   PetscInt       cstart = A->cmap->rstart;
2433   PetscInt       *cmap  = mat->garray;
2434   PetscInt       *diagIdx, *offdiagIdx;
2435   Vec            diagV, offdiagV;
2436   PetscScalar    *a, *diagA, *offdiagA;
2437   PetscInt       r;
2438   PetscErrorCode ierr;
2439 
2440   PetscFunctionBegin;
2441   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2442   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2443   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2444   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2445   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2446   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2447   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2448   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2449   for (r = 0; r < n; ++r) {
2450     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2451       a[r]   = diagA[r];
2452       idx[r] = cstart + diagIdx[r];
2453     } else {
2454       a[r]   = offdiagA[r];
2455       idx[r] = cmap[offdiagIdx[r]];
2456     }
2457   }
2458   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2459   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2460   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2461   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2462   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2463   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2469 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2470 {
2471   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2472   PetscInt       n      = A->rmap->n;
2473   PetscInt       cstart = A->cmap->rstart;
2474   PetscInt       *cmap  = mat->garray;
2475   PetscInt       *diagIdx, *offdiagIdx;
2476   Vec            diagV, offdiagV;
2477   PetscScalar    *a, *diagA, *offdiagA;
2478   PetscInt       r;
2479   PetscErrorCode ierr;
2480 
2481   PetscFunctionBegin;
2482   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2483   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2484   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2485   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2486   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2487   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2488   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2489   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2490   for (r = 0; r < n; ++r) {
2491     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2492       a[r]   = diagA[r];
2493       idx[r] = cstart + diagIdx[r];
2494     } else {
2495       a[r]   = offdiagA[r];
2496       idx[r] = cmap[offdiagIdx[r]];
2497     }
2498   }
2499   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2500   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2501   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2502   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2503   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2504   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 #undef __FUNCT__
2509 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2510 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2511 {
2512   PetscErrorCode ierr;
2513   Mat            *dummy;
2514 
2515   PetscFunctionBegin;
2516   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2517   *newmat = *dummy;
2518   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2524 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2525 {
2526   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2527   PetscErrorCode ierr;
2528 
2529   PetscFunctionBegin;
2530   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 #undef __FUNCT__
2535 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2536 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2537 {
2538   PetscErrorCode ierr;
2539   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2540 
2541   PetscFunctionBegin;
2542   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2543   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2544   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2545   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 /* -------------------------------------------------------------------*/
2550 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2551                                        MatGetRow_MPIAIJ,
2552                                        MatRestoreRow_MPIAIJ,
2553                                        MatMult_MPIAIJ,
2554                                 /* 4*/ MatMultAdd_MPIAIJ,
2555                                        MatMultTranspose_MPIAIJ,
2556                                        MatMultTransposeAdd_MPIAIJ,
2557 #if defined(PETSC_HAVE_PBGL)
2558                                        MatSolve_MPIAIJ,
2559 #else
2560                                        0,
2561 #endif
2562                                        0,
2563                                        0,
2564                                 /*10*/ 0,
2565                                        0,
2566                                        0,
2567                                        MatSOR_MPIAIJ,
2568                                        MatTranspose_MPIAIJ,
2569                                 /*15*/ MatGetInfo_MPIAIJ,
2570                                        MatEqual_MPIAIJ,
2571                                        MatGetDiagonal_MPIAIJ,
2572                                        MatDiagonalScale_MPIAIJ,
2573                                        MatNorm_MPIAIJ,
2574                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2575                                        MatAssemblyEnd_MPIAIJ,
2576                                        MatSetOption_MPIAIJ,
2577                                        MatZeroEntries_MPIAIJ,
2578                                 /*24*/ MatZeroRows_MPIAIJ,
2579                                        0,
2580 #if defined(PETSC_HAVE_PBGL)
2581                                        0,
2582 #else
2583                                        0,
2584 #endif
2585                                        0,
2586                                        0,
2587                                 /*29*/ MatSetUp_MPIAIJ,
2588 #if defined(PETSC_HAVE_PBGL)
2589                                        0,
2590 #else
2591                                        0,
2592 #endif
2593                                        0,
2594                                        0,
2595                                        0,
2596                                 /*34*/ MatDuplicate_MPIAIJ,
2597                                        0,
2598                                        0,
2599                                        0,
2600                                        0,
2601                                 /*39*/ MatAXPY_MPIAIJ,
2602                                        MatGetSubMatrices_MPIAIJ,
2603                                        MatIncreaseOverlap_MPIAIJ,
2604                                        MatGetValues_MPIAIJ,
2605                                        MatCopy_MPIAIJ,
2606                                 /*44*/ MatGetRowMax_MPIAIJ,
2607                                        MatScale_MPIAIJ,
2608                                        0,
2609                                        MatDiagonalSet_MPIAIJ,
2610                                        MatZeroRowsColumns_MPIAIJ,
2611                                 /*49*/ MatSetRandom_MPIAIJ,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2617                                        0,
2618                                        MatSetUnfactored_MPIAIJ,
2619                                        MatPermute_MPIAIJ,
2620                                        0,
2621                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2622                                        MatDestroy_MPIAIJ,
2623                                        MatView_MPIAIJ,
2624                                        0,
2625                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2626                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2627                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2632                                        MatGetRowMinAbs_MPIAIJ,
2633                                        0,
2634                                        MatSetColoring_MPIAIJ,
2635                                        0,
2636                                        MatSetValuesAdifor_MPIAIJ,
2637                                 /*75*/ MatFDColoringApply_AIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                        MatFindZeroDiagonals_MPIAIJ,
2642                                 /*80*/ 0,
2643                                        0,
2644                                        0,
2645                                 /*83*/ MatLoad_MPIAIJ,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2652                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2653                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2654                                        MatPtAP_MPIAIJ_MPIAIJ,
2655                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2656                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                 /*99*/ 0,
2662                                        0,
2663                                        0,
2664                                        MatConjugate_MPIAIJ,
2665                                        0,
2666                                 /*104*/MatSetValuesRow_MPIAIJ,
2667                                        MatRealPart_MPIAIJ,
2668                                        MatImaginaryPart_MPIAIJ,
2669                                        0,
2670                                        0,
2671                                 /*109*/0,
2672                                        0,
2673                                        MatGetRowMin_MPIAIJ,
2674                                        0,
2675                                        0,
2676                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                 /*119*/0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                        MatGetMultiProcBlock_MPIAIJ,
2686                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2687                                        MatGetColumnNorms_MPIAIJ,
2688                                        MatInvertBlockDiagonal_MPIAIJ,
2689                                        0,
2690                                        MatGetSubMatricesParallel_MPIAIJ,
2691                                 /*129*/0,
2692                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2693                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2694                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2695                                        0,
2696                                 /*134*/0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                 /*139*/0,
2702                                        0,
2703                                        0,
2704                                        MatFDColoringSetUp_MPIXAIJ,
2705                                        0,
2706                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2707 };
2708 
2709 /* ----------------------------------------------------------------------------------------*/
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2713 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2714 {
2715   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2716   PetscErrorCode ierr;
2717 
2718   PetscFunctionBegin;
2719   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2720   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2721   PetscFunctionReturn(0);
2722 }
2723 
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2726 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2727 {
2728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2729   PetscErrorCode ierr;
2730 
2731   PetscFunctionBegin;
2732   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2733   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 #undef __FUNCT__
2738 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2739 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2740 {
2741   Mat_MPIAIJ     *b;
2742   PetscErrorCode ierr;
2743 
2744   PetscFunctionBegin;
2745   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2746   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2747   b = (Mat_MPIAIJ*)B->data;
2748 
2749   if (!B->preallocated) {
2750     /* Explicitly create 2 MATSEQAIJ matrices. */
2751     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2752     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2753     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2754     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2755     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2756     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2757     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2758     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2759     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2760     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2761   }
2762 
2763   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2764   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2765   B->preallocated = PETSC_TRUE;
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 #undef __FUNCT__
2770 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2771 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2772 {
2773   Mat            mat;
2774   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2775   PetscErrorCode ierr;
2776 
2777   PetscFunctionBegin;
2778   *newmat = 0;
2779   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2780   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2781   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2782   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2783   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2784   a       = (Mat_MPIAIJ*)mat->data;
2785 
2786   mat->factortype   = matin->factortype;
2787   mat->assembled    = PETSC_TRUE;
2788   mat->insertmode   = NOT_SET_VALUES;
2789   mat->preallocated = PETSC_TRUE;
2790 
2791   a->size         = oldmat->size;
2792   a->rank         = oldmat->rank;
2793   a->donotstash   = oldmat->donotstash;
2794   a->roworiented  = oldmat->roworiented;
2795   a->rowindices   = 0;
2796   a->rowvalues    = 0;
2797   a->getrowactive = PETSC_FALSE;
2798 
2799   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2800   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2801 
2802   if (oldmat->colmap) {
2803 #if defined(PETSC_USE_CTABLE)
2804     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2805 #else
2806     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
2807     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2808     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2809 #endif
2810   } else a->colmap = 0;
2811   if (oldmat->garray) {
2812     PetscInt len;
2813     len  = oldmat->B->cmap->n;
2814     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
2815     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2816     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2817   } else a->garray = 0;
2818 
2819   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2820   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2821   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2822   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2823   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2824   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2825   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2826   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2827   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2828   *newmat = mat;
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 
2833 
2834 #undef __FUNCT__
2835 #define __FUNCT__ "MatLoad_MPIAIJ"
2836 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2837 {
2838   PetscScalar    *vals,*svals;
2839   MPI_Comm       comm;
2840   PetscErrorCode ierr;
2841   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2842   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2843   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2844   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2845   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2846   int            fd;
2847   PetscInt       bs = 1;
2848 
2849   PetscFunctionBegin;
2850   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2851   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2852   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2853   if (!rank) {
2854     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2855     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2856     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2857   }
2858 
2859   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
2860   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2861   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2862 
2863   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2864 
2865   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2866   M    = header[1]; N = header[2];
2867   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2868   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2869   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2870 
2871   /* If global sizes are set, check if they are consistent with that given in the file */
2872   if (sizesset) {
2873     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2874   }
2875   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2876   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2877 
2878   /* determine ownership of all (block) rows */
2879   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2880   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2881   else m = newMat->rmap->n; /* Set by user */
2882 
2883   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
2884   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2885 
2886   /* First process needs enough room for process with most rows */
2887   if (!rank) {
2888     mmax = rowners[1];
2889     for (i=2; i<=size; i++) {
2890       mmax = PetscMax(mmax, rowners[i]);
2891     }
2892   } else mmax = -1;             /* unused, but compilers complain */
2893 
2894   rowners[0] = 0;
2895   for (i=2; i<=size; i++) {
2896     rowners[i] += rowners[i-1];
2897   }
2898   rstart = rowners[rank];
2899   rend   = rowners[rank+1];
2900 
2901   /* distribute row lengths to all processors */
2902   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2903   if (!rank) {
2904     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2905     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2906     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2907     for (j=0; j<m; j++) {
2908       procsnz[0] += ourlens[j];
2909     }
2910     for (i=1; i<size; i++) {
2911       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2912       /* calculate the number of nonzeros on each processor */
2913       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2914         procsnz[i] += rowlengths[j];
2915       }
2916       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2917     }
2918     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2919   } else {
2920     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2921   }
2922 
2923   if (!rank) {
2924     /* determine max buffer needed and allocate it */
2925     maxnz = 0;
2926     for (i=0; i<size; i++) {
2927       maxnz = PetscMax(maxnz,procsnz[i]);
2928     }
2929     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2930 
2931     /* read in my part of the matrix column indices  */
2932     nz   = procsnz[0];
2933     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2934     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2935 
2936     /* read in every one elses and ship off */
2937     for (i=1; i<size; i++) {
2938       nz   = procsnz[i];
2939       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2940       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2941     }
2942     ierr = PetscFree(cols);CHKERRQ(ierr);
2943   } else {
2944     /* determine buffer space needed for message */
2945     nz = 0;
2946     for (i=0; i<m; i++) {
2947       nz += ourlens[i];
2948     }
2949     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2950 
2951     /* receive message of column indices*/
2952     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2953   }
2954 
2955   /* determine column ownership if matrix is not square */
2956   if (N != M) {
2957     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2958     else n = newMat->cmap->n;
2959     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2960     cstart = cend - n;
2961   } else {
2962     cstart = rstart;
2963     cend   = rend;
2964     n      = cend - cstart;
2965   }
2966 
2967   /* loop over local rows, determining number of off diagonal entries */
2968   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2969   jj   = 0;
2970   for (i=0; i<m; i++) {
2971     for (j=0; j<ourlens[i]; j++) {
2972       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2973       jj++;
2974     }
2975   }
2976 
2977   for (i=0; i<m; i++) {
2978     ourlens[i] -= offlens[i];
2979   }
2980   if (!sizesset) {
2981     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2982   }
2983 
2984   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2985 
2986   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2987 
2988   for (i=0; i<m; i++) {
2989     ourlens[i] += offlens[i];
2990   }
2991 
2992   if (!rank) {
2993     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
2994 
2995     /* read in my part of the matrix numerical values  */
2996     nz   = procsnz[0];
2997     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2998 
2999     /* insert into matrix */
3000     jj      = rstart;
3001     smycols = mycols;
3002     svals   = vals;
3003     for (i=0; i<m; i++) {
3004       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3005       smycols += ourlens[i];
3006       svals   += ourlens[i];
3007       jj++;
3008     }
3009 
3010     /* read in other processors and ship out */
3011     for (i=1; i<size; i++) {
3012       nz   = procsnz[i];
3013       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3014       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3015     }
3016     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3017   } else {
3018     /* receive numeric values */
3019     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3020 
3021     /* receive message of values*/
3022     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3023 
3024     /* insert into matrix */
3025     jj      = rstart;
3026     smycols = mycols;
3027     svals   = vals;
3028     for (i=0; i<m; i++) {
3029       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3030       smycols += ourlens[i];
3031       svals   += ourlens[i];
3032       jj++;
3033     }
3034   }
3035   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3036   ierr = PetscFree(vals);CHKERRQ(ierr);
3037   ierr = PetscFree(mycols);CHKERRQ(ierr);
3038   ierr = PetscFree(rowners);CHKERRQ(ierr);
3039   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3040   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3041   PetscFunctionReturn(0);
3042 }
3043 
3044 #undef __FUNCT__
3045 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3046 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3047 {
3048   PetscErrorCode ierr;
3049   IS             iscol_local;
3050   PetscInt       csize;
3051 
3052   PetscFunctionBegin;
3053   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3054   if (call == MAT_REUSE_MATRIX) {
3055     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3056     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3057   } else {
3058     PetscInt cbs;
3059     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3060     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3061     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3062   }
3063   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3064   if (call == MAT_INITIAL_MATRIX) {
3065     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3066     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3067   }
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3074 /*
3075     Not great since it makes two copies of the submatrix, first an SeqAIJ
3076   in local and then by concatenating the local matrices the end result.
3077   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3078 
3079   Note: This requires a sequential iscol with all indices.
3080 */
3081 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3082 {
3083   PetscErrorCode ierr;
3084   PetscMPIInt    rank,size;
3085   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3086   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3087   PetscBool      allcolumns, colflag;
3088   Mat            M,Mreuse;
3089   MatScalar      *vwork,*aa;
3090   MPI_Comm       comm;
3091   Mat_SeqAIJ     *aij;
3092 
3093   PetscFunctionBegin;
3094   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3095   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3096   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3097 
3098   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3099   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3100   if (colflag && ncol == mat->cmap->N) {
3101     allcolumns = PETSC_TRUE;
3102   } else {
3103     allcolumns = PETSC_FALSE;
3104   }
3105   if (call ==  MAT_REUSE_MATRIX) {
3106     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3107     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3108     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3109   } else {
3110     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3111   }
3112 
3113   /*
3114       m - number of local rows
3115       n - number of columns (same on all processors)
3116       rstart - first row in new global matrix generated
3117   */
3118   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3119   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3120   if (call == MAT_INITIAL_MATRIX) {
3121     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3122     ii  = aij->i;
3123     jj  = aij->j;
3124 
3125     /*
3126         Determine the number of non-zeros in the diagonal and off-diagonal
3127         portions of the matrix in order to do correct preallocation
3128     */
3129 
3130     /* first get start and end of "diagonal" columns */
3131     if (csize == PETSC_DECIDE) {
3132       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3133       if (mglobal == n) { /* square matrix */
3134         nlocal = m;
3135       } else {
3136         nlocal = n/size + ((n % size) > rank);
3137       }
3138     } else {
3139       nlocal = csize;
3140     }
3141     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3142     rstart = rend - nlocal;
3143     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3144 
3145     /* next, compute all the lengths */
3146     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3147     olens = dlens + m;
3148     for (i=0; i<m; i++) {
3149       jend = ii[i+1] - ii[i];
3150       olen = 0;
3151       dlen = 0;
3152       for (j=0; j<jend; j++) {
3153         if (*jj < rstart || *jj >= rend) olen++;
3154         else dlen++;
3155         jj++;
3156       }
3157       olens[i] = olen;
3158       dlens[i] = dlen;
3159     }
3160     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3161     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3162     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3163     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3164     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3165     ierr = PetscFree(dlens);CHKERRQ(ierr);
3166   } else {
3167     PetscInt ml,nl;
3168 
3169     M    = *newmat;
3170     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3171     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3172     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3173     /*
3174          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3175        rather than the slower MatSetValues().
3176     */
3177     M->was_assembled = PETSC_TRUE;
3178     M->assembled     = PETSC_FALSE;
3179   }
3180   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3181   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3182   ii   = aij->i;
3183   jj   = aij->j;
3184   aa   = aij->a;
3185   for (i=0; i<m; i++) {
3186     row   = rstart + i;
3187     nz    = ii[i+1] - ii[i];
3188     cwork = jj;     jj += nz;
3189     vwork = aa;     aa += nz;
3190     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3191   }
3192 
3193   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3194   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3195   *newmat = M;
3196 
3197   /* save submatrix used in processor for next request */
3198   if (call ==  MAT_INITIAL_MATRIX) {
3199     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3200     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3201   }
3202   PetscFunctionReturn(0);
3203 }
3204 
3205 #undef __FUNCT__
3206 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3207 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3208 {
3209   PetscInt       m,cstart, cend,j,nnz,i,d;
3210   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3211   const PetscInt *JJ;
3212   PetscScalar    *values;
3213   PetscErrorCode ierr;
3214 
3215   PetscFunctionBegin;
3216   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3217 
3218   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3219   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3220   m      = B->rmap->n;
3221   cstart = B->cmap->rstart;
3222   cend   = B->cmap->rend;
3223   rstart = B->rmap->rstart;
3224 
3225   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3226 
3227 #if defined(PETSC_USE_DEBUGGING)
3228   for (i=0; i<m; i++) {
3229     nnz = Ii[i+1]- Ii[i];
3230     JJ  = J + Ii[i];
3231     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3232     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3233     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3234   }
3235 #endif
3236 
3237   for (i=0; i<m; i++) {
3238     nnz     = Ii[i+1]- Ii[i];
3239     JJ      = J + Ii[i];
3240     nnz_max = PetscMax(nnz_max,nnz);
3241     d       = 0;
3242     for (j=0; j<nnz; j++) {
3243       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3244     }
3245     d_nnz[i] = d;
3246     o_nnz[i] = nnz - d;
3247   }
3248   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3249   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3250 
3251   if (v) values = (PetscScalar*)v;
3252   else {
3253     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3254   }
3255 
3256   for (i=0; i<m; i++) {
3257     ii   = i + rstart;
3258     nnz  = Ii[i+1]- Ii[i];
3259     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3260   }
3261   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3262   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3263 
3264   if (!v) {
3265     ierr = PetscFree(values);CHKERRQ(ierr);
3266   }
3267   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3268   PetscFunctionReturn(0);
3269 }
3270 
3271 #undef __FUNCT__
3272 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3273 /*@
3274    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3275    (the default parallel PETSc format).
3276 
3277    Collective on MPI_Comm
3278 
3279    Input Parameters:
3280 +  B - the matrix
3281 .  i - the indices into j for the start of each local row (starts with zero)
3282 .  j - the column indices for each local row (starts with zero)
3283 -  v - optional values in the matrix
3284 
3285    Level: developer
3286 
3287    Notes:
3288        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3289      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3290      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3291 
3292        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3293 
3294        The format which is used for the sparse matrix input, is equivalent to a
3295     row-major ordering.. i.e for the following matrix, the input data expected is
3296     as shown:
3297 
3298         1 0 0
3299         2 0 3     P0
3300        -------
3301         4 5 6     P1
3302 
3303      Process0 [P0]: rows_owned=[0,1]
3304         i =  {0,1,3}  [size = nrow+1  = 2+1]
3305         j =  {0,0,2}  [size = nz = 6]
3306         v =  {1,2,3}  [size = nz = 6]
3307 
3308      Process1 [P1]: rows_owned=[2]
3309         i =  {0,3}    [size = nrow+1  = 1+1]
3310         j =  {0,1,2}  [size = nz = 6]
3311         v =  {4,5,6}  [size = nz = 6]
3312 
3313 .keywords: matrix, aij, compressed row, sparse, parallel
3314 
3315 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3316           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3317 @*/
3318 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3319 {
3320   PetscErrorCode ierr;
3321 
3322   PetscFunctionBegin;
3323   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3324   PetscFunctionReturn(0);
3325 }
3326 
3327 #undef __FUNCT__
3328 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3329 /*@C
3330    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3331    (the default parallel PETSc format).  For good matrix assembly performance
3332    the user should preallocate the matrix storage by setting the parameters
3333    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3334    performance can be increased by more than a factor of 50.
3335 
3336    Collective on MPI_Comm
3337 
3338    Input Parameters:
3339 +  B - the matrix
3340 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3341            (same value is used for all local rows)
3342 .  d_nnz - array containing the number of nonzeros in the various rows of the
3343            DIAGONAL portion of the local submatrix (possibly different for each row)
3344            or NULL, if d_nz is used to specify the nonzero structure.
3345            The size of this array is equal to the number of local rows, i.e 'm'.
3346            For matrices that will be factored, you must leave room for (and set)
3347            the diagonal entry even if it is zero.
3348 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3349            submatrix (same value is used for all local rows).
3350 -  o_nnz - array containing the number of nonzeros in the various rows of the
3351            OFF-DIAGONAL portion of the local submatrix (possibly different for
3352            each row) or NULL, if o_nz is used to specify the nonzero
3353            structure. The size of this array is equal to the number
3354            of local rows, i.e 'm'.
3355 
3356    If the *_nnz parameter is given then the *_nz parameter is ignored
3357 
3358    The AIJ format (also called the Yale sparse matrix format or
3359    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3360    storage.  The stored row and column indices begin with zero.
3361    See Users-Manual: ch_mat for details.
3362 
3363    The parallel matrix is partitioned such that the first m0 rows belong to
3364    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3365    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3366 
3367    The DIAGONAL portion of the local submatrix of a processor can be defined
3368    as the submatrix which is obtained by extraction the part corresponding to
3369    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3370    first row that belongs to the processor, r2 is the last row belonging to
3371    the this processor, and c1-c2 is range of indices of the local part of a
3372    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3373    common case of a square matrix, the row and column ranges are the same and
3374    the DIAGONAL part is also square. The remaining portion of the local
3375    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3376 
3377    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3378 
3379    You can call MatGetInfo() to get information on how effective the preallocation was;
3380    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3381    You can also run with the option -info and look for messages with the string
3382    malloc in them to see if additional memory allocation was needed.
3383 
3384    Example usage:
3385 
3386    Consider the following 8x8 matrix with 34 non-zero values, that is
3387    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3388    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3389    as follows:
3390 
3391 .vb
3392             1  2  0  |  0  3  0  |  0  4
3393     Proc0   0  5  6  |  7  0  0  |  8  0
3394             9  0 10  | 11  0  0  | 12  0
3395     -------------------------------------
3396            13  0 14  | 15 16 17  |  0  0
3397     Proc1   0 18  0  | 19 20 21  |  0  0
3398             0  0  0  | 22 23  0  | 24  0
3399     -------------------------------------
3400     Proc2  25 26 27  |  0  0 28  | 29  0
3401            30  0  0  | 31 32 33  |  0 34
3402 .ve
3403 
3404    This can be represented as a collection of submatrices as:
3405 
3406 .vb
3407       A B C
3408       D E F
3409       G H I
3410 .ve
3411 
3412    Where the submatrices A,B,C are owned by proc0, D,E,F are
3413    owned by proc1, G,H,I are owned by proc2.
3414 
3415    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3416    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3417    The 'M','N' parameters are 8,8, and have the same values on all procs.
3418 
3419    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3420    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3421    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3422    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3423    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3424    matrix, ans [DF] as another SeqAIJ matrix.
3425 
3426    When d_nz, o_nz parameters are specified, d_nz storage elements are
3427    allocated for every row of the local diagonal submatrix, and o_nz
3428    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3429    One way to choose d_nz and o_nz is to use the max nonzerors per local
3430    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3431    In this case, the values of d_nz,o_nz are:
3432 .vb
3433      proc0 : dnz = 2, o_nz = 2
3434      proc1 : dnz = 3, o_nz = 2
3435      proc2 : dnz = 1, o_nz = 4
3436 .ve
3437    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3438    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3439    for proc3. i.e we are using 12+15+10=37 storage locations to store
3440    34 values.
3441 
3442    When d_nnz, o_nnz parameters are specified, the storage is specified
3443    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3444    In the above case the values for d_nnz,o_nnz are:
3445 .vb
3446      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3447      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3448      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3449 .ve
3450    Here the space allocated is sum of all the above values i.e 34, and
3451    hence pre-allocation is perfect.
3452 
3453    Level: intermediate
3454 
3455 .keywords: matrix, aij, compressed row, sparse, parallel
3456 
3457 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3458           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3459 @*/
3460 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3461 {
3462   PetscErrorCode ierr;
3463 
3464   PetscFunctionBegin;
3465   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3466   PetscValidType(B,1);
3467   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3468   PetscFunctionReturn(0);
3469 }
3470 
3471 #undef __FUNCT__
3472 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3473 /*@
3474      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3475          CSR format the local rows.
3476 
3477    Collective on MPI_Comm
3478 
3479    Input Parameters:
3480 +  comm - MPI communicator
3481 .  m - number of local rows (Cannot be PETSC_DECIDE)
3482 .  n - This value should be the same as the local size used in creating the
3483        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3484        calculated if N is given) For square matrices n is almost always m.
3485 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3486 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3487 .   i - row indices
3488 .   j - column indices
3489 -   a - matrix values
3490 
3491    Output Parameter:
3492 .   mat - the matrix
3493 
3494    Level: intermediate
3495 
3496    Notes:
3497        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3498      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3499      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3500 
3501        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3502 
3503        The format which is used for the sparse matrix input, is equivalent to a
3504     row-major ordering.. i.e for the following matrix, the input data expected is
3505     as shown:
3506 
3507         1 0 0
3508         2 0 3     P0
3509        -------
3510         4 5 6     P1
3511 
3512      Process0 [P0]: rows_owned=[0,1]
3513         i =  {0,1,3}  [size = nrow+1  = 2+1]
3514         j =  {0,0,2}  [size = nz = 6]
3515         v =  {1,2,3}  [size = nz = 6]
3516 
3517      Process1 [P1]: rows_owned=[2]
3518         i =  {0,3}    [size = nrow+1  = 1+1]
3519         j =  {0,1,2}  [size = nz = 6]
3520         v =  {4,5,6}  [size = nz = 6]
3521 
3522 .keywords: matrix, aij, compressed row, sparse, parallel
3523 
3524 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3525           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3526 @*/
3527 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3528 {
3529   PetscErrorCode ierr;
3530 
3531   PetscFunctionBegin;
3532   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3533   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3534   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3535   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3536   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3537   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3538   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3539   PetscFunctionReturn(0);
3540 }
3541 
3542 #undef __FUNCT__
3543 #define __FUNCT__ "MatCreateAIJ"
3544 /*@C
3545    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3546    (the default parallel PETSc format).  For good matrix assembly performance
3547    the user should preallocate the matrix storage by setting the parameters
3548    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3549    performance can be increased by more than a factor of 50.
3550 
3551    Collective on MPI_Comm
3552 
3553    Input Parameters:
3554 +  comm - MPI communicator
3555 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3556            This value should be the same as the local size used in creating the
3557            y vector for the matrix-vector product y = Ax.
3558 .  n - This value should be the same as the local size used in creating the
3559        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3560        calculated if N is given) For square matrices n is almost always m.
3561 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3562 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3563 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3564            (same value is used for all local rows)
3565 .  d_nnz - array containing the number of nonzeros in the various rows of the
3566            DIAGONAL portion of the local submatrix (possibly different for each row)
3567            or NULL, if d_nz is used to specify the nonzero structure.
3568            The size of this array is equal to the number of local rows, i.e 'm'.
3569 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3570            submatrix (same value is used for all local rows).
3571 -  o_nnz - array containing the number of nonzeros in the various rows of the
3572            OFF-DIAGONAL portion of the local submatrix (possibly different for
3573            each row) or NULL, if o_nz is used to specify the nonzero
3574            structure. The size of this array is equal to the number
3575            of local rows, i.e 'm'.
3576 
3577    Output Parameter:
3578 .  A - the matrix
3579 
3580    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3581    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3582    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3583 
3584    Notes:
3585    If the *_nnz parameter is given then the *_nz parameter is ignored
3586 
3587    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3588    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3589    storage requirements for this matrix.
3590 
3591    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3592    processor than it must be used on all processors that share the object for
3593    that argument.
3594 
3595    The user MUST specify either the local or global matrix dimensions
3596    (possibly both).
3597 
3598    The parallel matrix is partitioned across processors such that the
3599    first m0 rows belong to process 0, the next m1 rows belong to
3600    process 1, the next m2 rows belong to process 2 etc.. where
3601    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3602    values corresponding to [m x N] submatrix.
3603 
3604    The columns are logically partitioned with the n0 columns belonging
3605    to 0th partition, the next n1 columns belonging to the next
3606    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3607 
3608    The DIAGONAL portion of the local submatrix on any given processor
3609    is the submatrix corresponding to the rows and columns m,n
3610    corresponding to the given processor. i.e diagonal matrix on
3611    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3612    etc. The remaining portion of the local submatrix [m x (N-n)]
3613    constitute the OFF-DIAGONAL portion. The example below better
3614    illustrates this concept.
3615 
3616    For a square global matrix we define each processor's diagonal portion
3617    to be its local rows and the corresponding columns (a square submatrix);
3618    each processor's off-diagonal portion encompasses the remainder of the
3619    local matrix (a rectangular submatrix).
3620 
3621    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3622 
3623    When calling this routine with a single process communicator, a matrix of
3624    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3625    type of communicator, use the construction mechanism:
3626      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3627 
3628    By default, this format uses inodes (identical nodes) when possible.
3629    We search for consecutive rows with the same nonzero structure, thereby
3630    reusing matrix information to achieve increased efficiency.
3631 
3632    Options Database Keys:
3633 +  -mat_no_inode  - Do not use inodes
3634 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3635 -  -mat_aij_oneindex - Internally use indexing starting at 1
3636         rather than 0.  Note that when calling MatSetValues(),
3637         the user still MUST index entries starting at 0!
3638 
3639 
3640    Example usage:
3641 
3642    Consider the following 8x8 matrix with 34 non-zero values, that is
3643    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3644    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3645    as follows:
3646 
3647 .vb
3648             1  2  0  |  0  3  0  |  0  4
3649     Proc0   0  5  6  |  7  0  0  |  8  0
3650             9  0 10  | 11  0  0  | 12  0
3651     -------------------------------------
3652            13  0 14  | 15 16 17  |  0  0
3653     Proc1   0 18  0  | 19 20 21  |  0  0
3654             0  0  0  | 22 23  0  | 24  0
3655     -------------------------------------
3656     Proc2  25 26 27  |  0  0 28  | 29  0
3657            30  0  0  | 31 32 33  |  0 34
3658 .ve
3659 
3660    This can be represented as a collection of submatrices as:
3661 
3662 .vb
3663       A B C
3664       D E F
3665       G H I
3666 .ve
3667 
3668    Where the submatrices A,B,C are owned by proc0, D,E,F are
3669    owned by proc1, G,H,I are owned by proc2.
3670 
3671    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3672    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3673    The 'M','N' parameters are 8,8, and have the same values on all procs.
3674 
3675    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3676    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3677    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3678    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3679    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3680    matrix, ans [DF] as another SeqAIJ matrix.
3681 
3682    When d_nz, o_nz parameters are specified, d_nz storage elements are
3683    allocated for every row of the local diagonal submatrix, and o_nz
3684    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3685    One way to choose d_nz and o_nz is to use the max nonzerors per local
3686    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3687    In this case, the values of d_nz,o_nz are:
3688 .vb
3689      proc0 : dnz = 2, o_nz = 2
3690      proc1 : dnz = 3, o_nz = 2
3691      proc2 : dnz = 1, o_nz = 4
3692 .ve
3693    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3694    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3695    for proc3. i.e we are using 12+15+10=37 storage locations to store
3696    34 values.
3697 
3698    When d_nnz, o_nnz parameters are specified, the storage is specified
3699    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3700    In the above case the values for d_nnz,o_nnz are:
3701 .vb
3702      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3703      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3704      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3705 .ve
3706    Here the space allocated is sum of all the above values i.e 34, and
3707    hence pre-allocation is perfect.
3708 
3709    Level: intermediate
3710 
3711 .keywords: matrix, aij, compressed row, sparse, parallel
3712 
3713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3714           MPIAIJ, MatCreateMPIAIJWithArrays()
3715 @*/
3716 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3717 {
3718   PetscErrorCode ierr;
3719   PetscMPIInt    size;
3720 
3721   PetscFunctionBegin;
3722   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3723   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3724   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3725   if (size > 1) {
3726     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3727     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3728   } else {
3729     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3730     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3731   }
3732   PetscFunctionReturn(0);
3733 }
3734 
3735 #undef __FUNCT__
3736 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3737 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3738 {
3739   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3740 
3741   PetscFunctionBegin;
3742   if (Ad)     *Ad     = a->A;
3743   if (Ao)     *Ao     = a->B;
3744   if (colmap) *colmap = a->garray;
3745   PetscFunctionReturn(0);
3746 }
3747 
3748 #undef __FUNCT__
3749 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3750 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3751 {
3752   PetscErrorCode ierr;
3753   PetscInt       i;
3754   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3755 
3756   PetscFunctionBegin;
3757   if (coloring->ctype == IS_COLORING_GLOBAL) {
3758     ISColoringValue *allcolors,*colors;
3759     ISColoring      ocoloring;
3760 
3761     /* set coloring for diagonal portion */
3762     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3763 
3764     /* set coloring for off-diagonal portion */
3765     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3766     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
3767     for (i=0; i<a->B->cmap->n; i++) {
3768       colors[i] = allcolors[a->garray[i]];
3769     }
3770     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3771     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3772     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3773     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3774   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3775     ISColoringValue *colors;
3776     PetscInt        *larray;
3777     ISColoring      ocoloring;
3778 
3779     /* set coloring for diagonal portion */
3780     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
3781     for (i=0; i<a->A->cmap->n; i++) {
3782       larray[i] = i + A->cmap->rstart;
3783     }
3784     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3785     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
3786     for (i=0; i<a->A->cmap->n; i++) {
3787       colors[i] = coloring->colors[larray[i]];
3788     }
3789     ierr = PetscFree(larray);CHKERRQ(ierr);
3790     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3791     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3792     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3793 
3794     /* set coloring for off-diagonal portion */
3795     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
3796     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3797     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
3798     for (i=0; i<a->B->cmap->n; i++) {
3799       colors[i] = coloring->colors[larray[i]];
3800     }
3801     ierr = PetscFree(larray);CHKERRQ(ierr);
3802     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3803     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3804     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3805   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3806   PetscFunctionReturn(0);
3807 }
3808 
3809 #undef __FUNCT__
3810 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3811 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3812 {
3813   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3814   PetscErrorCode ierr;
3815 
3816   PetscFunctionBegin;
3817   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3818   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3819   PetscFunctionReturn(0);
3820 }
3821 
3822 #undef __FUNCT__
3823 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3824 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3825 {
3826   PetscErrorCode ierr;
3827   PetscInt       m,N,i,rstart,nnz,Ii;
3828   PetscInt       *indx;
3829   PetscScalar    *values;
3830 
3831   PetscFunctionBegin;
3832   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3833   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3834     PetscInt       *dnz,*onz,sum,bs,cbs;
3835 
3836     if (n == PETSC_DECIDE) {
3837       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3838     }
3839     /* Check sum(n) = N */
3840     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3841     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3842 
3843     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3844     rstart -= m;
3845 
3846     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3847     for (i=0; i<m; i++) {
3848       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3849       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3850       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3851     }
3852 
3853     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3854     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3855     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3856     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3857     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3858     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3859     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3860   }
3861 
3862   /* numeric phase */
3863   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3864   for (i=0; i<m; i++) {
3865     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3866     Ii   = i + rstart;
3867     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3868     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3869   }
3870   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3871   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3872   PetscFunctionReturn(0);
3873 }
3874 
3875 #undef __FUNCT__
3876 #define __FUNCT__ "MatFileSplit"
3877 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3878 {
3879   PetscErrorCode    ierr;
3880   PetscMPIInt       rank;
3881   PetscInt          m,N,i,rstart,nnz;
3882   size_t            len;
3883   const PetscInt    *indx;
3884   PetscViewer       out;
3885   char              *name;
3886   Mat               B;
3887   const PetscScalar *values;
3888 
3889   PetscFunctionBegin;
3890   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3891   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3892   /* Should this be the type of the diagonal block of A? */
3893   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3894   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3895   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3896   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3897   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3898   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3899   for (i=0; i<m; i++) {
3900     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3901     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3902     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3903   }
3904   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3905   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3906 
3907   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3908   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3909   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
3910   sprintf(name,"%s.%d",outfile,rank);
3911   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3912   ierr = PetscFree(name);CHKERRQ(ierr);
3913   ierr = MatView(B,out);CHKERRQ(ierr);
3914   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3915   ierr = MatDestroy(&B);CHKERRQ(ierr);
3916   PetscFunctionReturn(0);
3917 }
3918 
3919 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3920 #undef __FUNCT__
3921 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3922 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3923 {
3924   PetscErrorCode      ierr;
3925   Mat_Merge_SeqsToMPI *merge;
3926   PetscContainer      container;
3927 
3928   PetscFunctionBegin;
3929   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3930   if (container) {
3931     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3932     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3933     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3934     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3935     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3936     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3937     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3938     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3939     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3940     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3941     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3942     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3943     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3944     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3945     ierr = PetscFree(merge);CHKERRQ(ierr);
3946     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3947   }
3948   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3949   PetscFunctionReturn(0);
3950 }
3951 
3952 #include <../src/mat/utils/freespace.h>
3953 #include <petscbt.h>
3954 
3955 #undef __FUNCT__
3956 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3957 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3958 {
3959   PetscErrorCode      ierr;
3960   MPI_Comm            comm;
3961   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3962   PetscMPIInt         size,rank,taga,*len_s;
3963   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3964   PetscInt            proc,m;
3965   PetscInt            **buf_ri,**buf_rj;
3966   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3967   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3968   MPI_Request         *s_waits,*r_waits;
3969   MPI_Status          *status;
3970   MatScalar           *aa=a->a;
3971   MatScalar           **abuf_r,*ba_i;
3972   Mat_Merge_SeqsToMPI *merge;
3973   PetscContainer      container;
3974 
3975   PetscFunctionBegin;
3976   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3977   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3978 
3979   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3980   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3981 
3982   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3983   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3984 
3985   bi     = merge->bi;
3986   bj     = merge->bj;
3987   buf_ri = merge->buf_ri;
3988   buf_rj = merge->buf_rj;
3989 
3990   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3991   owners = merge->rowmap->range;
3992   len_s  = merge->len_s;
3993 
3994   /* send and recv matrix values */
3995   /*-----------------------------*/
3996   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3997   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3998 
3999   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4000   for (proc=0,k=0; proc<size; proc++) {
4001     if (!len_s[proc]) continue;
4002     i    = owners[proc];
4003     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4004     k++;
4005   }
4006 
4007   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4008   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4009   ierr = PetscFree(status);CHKERRQ(ierr);
4010 
4011   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4012   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4013 
4014   /* insert mat values of mpimat */
4015   /*----------------------------*/
4016   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4017   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4018 
4019   for (k=0; k<merge->nrecv; k++) {
4020     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4021     nrows       = *(buf_ri_k[k]);
4022     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4023     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4024   }
4025 
4026   /* set values of ba */
4027   m = merge->rowmap->n;
4028   for (i=0; i<m; i++) {
4029     arow = owners[rank] + i;
4030     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4031     bnzi = bi[i+1] - bi[i];
4032     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4033 
4034     /* add local non-zero vals of this proc's seqmat into ba */
4035     anzi   = ai[arow+1] - ai[arow];
4036     aj     = a->j + ai[arow];
4037     aa     = a->a + ai[arow];
4038     nextaj = 0;
4039     for (j=0; nextaj<anzi; j++) {
4040       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4041         ba_i[j] += aa[nextaj++];
4042       }
4043     }
4044 
4045     /* add received vals into ba */
4046     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4047       /* i-th row */
4048       if (i == *nextrow[k]) {
4049         anzi   = *(nextai[k]+1) - *nextai[k];
4050         aj     = buf_rj[k] + *(nextai[k]);
4051         aa     = abuf_r[k] + *(nextai[k]);
4052         nextaj = 0;
4053         for (j=0; nextaj<anzi; j++) {
4054           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4055             ba_i[j] += aa[nextaj++];
4056           }
4057         }
4058         nextrow[k]++; nextai[k]++;
4059       }
4060     }
4061     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4062   }
4063   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4064   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4065 
4066   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4067   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4068   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4069   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4070   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4071   PetscFunctionReturn(0);
4072 }
4073 
4074 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4075 
4076 #undef __FUNCT__
4077 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4078 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4079 {
4080   PetscErrorCode      ierr;
4081   Mat                 B_mpi;
4082   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4083   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4084   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4085   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4086   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4087   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4088   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4089   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4090   MPI_Status          *status;
4091   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4092   PetscBT             lnkbt;
4093   Mat_Merge_SeqsToMPI *merge;
4094   PetscContainer      container;
4095 
4096   PetscFunctionBegin;
4097   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4098 
4099   /* make sure it is a PETSc comm */
4100   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4101   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4102   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4103 
4104   ierr = PetscNew(&merge);CHKERRQ(ierr);
4105   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4106 
4107   /* determine row ownership */
4108   /*---------------------------------------------------------*/
4109   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4110   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4111   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4112   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4113   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4114   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4115   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4116 
4117   m      = merge->rowmap->n;
4118   owners = merge->rowmap->range;
4119 
4120   /* determine the number of messages to send, their lengths */
4121   /*---------------------------------------------------------*/
4122   len_s = merge->len_s;
4123 
4124   len          = 0; /* length of buf_si[] */
4125   merge->nsend = 0;
4126   for (proc=0; proc<size; proc++) {
4127     len_si[proc] = 0;
4128     if (proc == rank) {
4129       len_s[proc] = 0;
4130     } else {
4131       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4132       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4133     }
4134     if (len_s[proc]) {
4135       merge->nsend++;
4136       nrows = 0;
4137       for (i=owners[proc]; i<owners[proc+1]; i++) {
4138         if (ai[i+1] > ai[i]) nrows++;
4139       }
4140       len_si[proc] = 2*(nrows+1);
4141       len         += len_si[proc];
4142     }
4143   }
4144 
4145   /* determine the number and length of messages to receive for ij-structure */
4146   /*-------------------------------------------------------------------------*/
4147   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4148   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4149 
4150   /* post the Irecv of j-structure */
4151   /*-------------------------------*/
4152   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4153   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4154 
4155   /* post the Isend of j-structure */
4156   /*--------------------------------*/
4157   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4158 
4159   for (proc=0, k=0; proc<size; proc++) {
4160     if (!len_s[proc]) continue;
4161     i    = owners[proc];
4162     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4163     k++;
4164   }
4165 
4166   /* receives and sends of j-structure are complete */
4167   /*------------------------------------------------*/
4168   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4169   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4170 
4171   /* send and recv i-structure */
4172   /*---------------------------*/
4173   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4174   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4175 
4176   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4177   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4178   for (proc=0,k=0; proc<size; proc++) {
4179     if (!len_s[proc]) continue;
4180     /* form outgoing message for i-structure:
4181          buf_si[0]:                 nrows to be sent
4182                [1:nrows]:           row index (global)
4183                [nrows+1:2*nrows+1]: i-structure index
4184     */
4185     /*-------------------------------------------*/
4186     nrows       = len_si[proc]/2 - 1;
4187     buf_si_i    = buf_si + nrows+1;
4188     buf_si[0]   = nrows;
4189     buf_si_i[0] = 0;
4190     nrows       = 0;
4191     for (i=owners[proc]; i<owners[proc+1]; i++) {
4192       anzi = ai[i+1] - ai[i];
4193       if (anzi) {
4194         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4195         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4196         nrows++;
4197       }
4198     }
4199     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4200     k++;
4201     buf_si += len_si[proc];
4202   }
4203 
4204   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4205   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4206 
4207   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4208   for (i=0; i<merge->nrecv; i++) {
4209     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4210   }
4211 
4212   ierr = PetscFree(len_si);CHKERRQ(ierr);
4213   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4214   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4215   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4216   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4217   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4218   ierr = PetscFree(status);CHKERRQ(ierr);
4219 
4220   /* compute a local seq matrix in each processor */
4221   /*----------------------------------------------*/
4222   /* allocate bi array and free space for accumulating nonzero column info */
4223   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4224   bi[0] = 0;
4225 
4226   /* create and initialize a linked list */
4227   nlnk = N+1;
4228   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4229 
4230   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4231   len  = ai[owners[rank+1]] - ai[owners[rank]];
4232   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4233 
4234   current_space = free_space;
4235 
4236   /* determine symbolic info for each local row */
4237   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4238 
4239   for (k=0; k<merge->nrecv; k++) {
4240     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4241     nrows       = *buf_ri_k[k];
4242     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4243     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4244   }
4245 
4246   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4247   len  = 0;
4248   for (i=0; i<m; i++) {
4249     bnzi = 0;
4250     /* add local non-zero cols of this proc's seqmat into lnk */
4251     arow  = owners[rank] + i;
4252     anzi  = ai[arow+1] - ai[arow];
4253     aj    = a->j + ai[arow];
4254     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4255     bnzi += nlnk;
4256     /* add received col data into lnk */
4257     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4258       if (i == *nextrow[k]) { /* i-th row */
4259         anzi  = *(nextai[k]+1) - *nextai[k];
4260         aj    = buf_rj[k] + *nextai[k];
4261         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4262         bnzi += nlnk;
4263         nextrow[k]++; nextai[k]++;
4264       }
4265     }
4266     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4267 
4268     /* if free space is not available, make more free space */
4269     if (current_space->local_remaining<bnzi) {
4270       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4271       nspacedouble++;
4272     }
4273     /* copy data into free space, then initialize lnk */
4274     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4275     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4276 
4277     current_space->array           += bnzi;
4278     current_space->local_used      += bnzi;
4279     current_space->local_remaining -= bnzi;
4280 
4281     bi[i+1] = bi[i] + bnzi;
4282   }
4283 
4284   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4285 
4286   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4287   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4288   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4289 
4290   /* create symbolic parallel matrix B_mpi */
4291   /*---------------------------------------*/
4292   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4293   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4294   if (n==PETSC_DECIDE) {
4295     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4296   } else {
4297     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4298   }
4299   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4300   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4301   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4302   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4303   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4304 
4305   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4306   B_mpi->assembled    = PETSC_FALSE;
4307   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4308   merge->bi           = bi;
4309   merge->bj           = bj;
4310   merge->buf_ri       = buf_ri;
4311   merge->buf_rj       = buf_rj;
4312   merge->coi          = NULL;
4313   merge->coj          = NULL;
4314   merge->owners_co    = NULL;
4315 
4316   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4317 
4318   /* attach the supporting struct to B_mpi for reuse */
4319   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4320   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4321   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4322   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4323   *mpimat = B_mpi;
4324 
4325   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4326   PetscFunctionReturn(0);
4327 }
4328 
4329 #undef __FUNCT__
4330 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4331 /*@C
4332       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4333                  matrices from each processor
4334 
4335     Collective on MPI_Comm
4336 
4337    Input Parameters:
4338 +    comm - the communicators the parallel matrix will live on
4339 .    seqmat - the input sequential matrices
4340 .    m - number of local rows (or PETSC_DECIDE)
4341 .    n - number of local columns (or PETSC_DECIDE)
4342 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4343 
4344    Output Parameter:
4345 .    mpimat - the parallel matrix generated
4346 
4347     Level: advanced
4348 
4349    Notes:
4350      The dimensions of the sequential matrix in each processor MUST be the same.
4351      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4352      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4353 @*/
4354 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4355 {
4356   PetscErrorCode ierr;
4357   PetscMPIInt    size;
4358 
4359   PetscFunctionBegin;
4360   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4361   if (size == 1) {
4362     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4363     if (scall == MAT_INITIAL_MATRIX) {
4364       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4365     } else {
4366       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4367     }
4368     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4369     PetscFunctionReturn(0);
4370   }
4371   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4372   if (scall == MAT_INITIAL_MATRIX) {
4373     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4374   }
4375   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4376   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4377   PetscFunctionReturn(0);
4378 }
4379 
4380 #undef __FUNCT__
4381 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4382 /*@
4383      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4384           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4385           with MatGetSize()
4386 
4387     Not Collective
4388 
4389    Input Parameters:
4390 +    A - the matrix
4391 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4392 
4393    Output Parameter:
4394 .    A_loc - the local sequential matrix generated
4395 
4396     Level: developer
4397 
4398 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4399 
4400 @*/
4401 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4402 {
4403   PetscErrorCode ierr;
4404   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4405   Mat_SeqAIJ     *mat,*a,*b;
4406   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4407   MatScalar      *aa,*ba,*cam;
4408   PetscScalar    *ca;
4409   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4410   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4411   PetscBool      match;
4412   MPI_Comm       comm;
4413   PetscMPIInt    size;
4414 
4415   PetscFunctionBegin;
4416   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4417   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4418   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4419   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4420   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4421 
4422   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4423   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4424   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4425   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4426   aa = a->a; ba = b->a;
4427   if (scall == MAT_INITIAL_MATRIX) {
4428     if (size == 1) {
4429       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4430       PetscFunctionReturn(0);
4431     }
4432 
4433     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4434     ci[0] = 0;
4435     for (i=0; i<am; i++) {
4436       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4437     }
4438     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4439     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4440     k    = 0;
4441     for (i=0; i<am; i++) {
4442       ncols_o = bi[i+1] - bi[i];
4443       ncols_d = ai[i+1] - ai[i];
4444       /* off-diagonal portion of A */
4445       for (jo=0; jo<ncols_o; jo++) {
4446         col = cmap[*bj];
4447         if (col >= cstart) break;
4448         cj[k]   = col; bj++;
4449         ca[k++] = *ba++;
4450       }
4451       /* diagonal portion of A */
4452       for (j=0; j<ncols_d; j++) {
4453         cj[k]   = cstart + *aj++;
4454         ca[k++] = *aa++;
4455       }
4456       /* off-diagonal portion of A */
4457       for (j=jo; j<ncols_o; j++) {
4458         cj[k]   = cmap[*bj++];
4459         ca[k++] = *ba++;
4460       }
4461     }
4462     /* put together the new matrix */
4463     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4464     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4465     /* Since these are PETSc arrays, change flags to free them as necessary. */
4466     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4467     mat->free_a  = PETSC_TRUE;
4468     mat->free_ij = PETSC_TRUE;
4469     mat->nonew   = 0;
4470   } else if (scall == MAT_REUSE_MATRIX) {
4471     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4472     ci = mat->i; cj = mat->j; cam = mat->a;
4473     for (i=0; i<am; i++) {
4474       /* off-diagonal portion of A */
4475       ncols_o = bi[i+1] - bi[i];
4476       for (jo=0; jo<ncols_o; jo++) {
4477         col = cmap[*bj];
4478         if (col >= cstart) break;
4479         *cam++ = *ba++; bj++;
4480       }
4481       /* diagonal portion of A */
4482       ncols_d = ai[i+1] - ai[i];
4483       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4484       /* off-diagonal portion of A */
4485       for (j=jo; j<ncols_o; j++) {
4486         *cam++ = *ba++; bj++;
4487       }
4488     }
4489   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4490   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4491   PetscFunctionReturn(0);
4492 }
4493 
4494 #undef __FUNCT__
4495 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4496 /*@C
4497      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4498 
4499     Not Collective
4500 
4501    Input Parameters:
4502 +    A - the matrix
4503 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4504 -    row, col - index sets of rows and columns to extract (or NULL)
4505 
4506    Output Parameter:
4507 .    A_loc - the local sequential matrix generated
4508 
4509     Level: developer
4510 
4511 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4512 
4513 @*/
4514 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4515 {
4516   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4517   PetscErrorCode ierr;
4518   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4519   IS             isrowa,iscola;
4520   Mat            *aloc;
4521   PetscBool      match;
4522 
4523   PetscFunctionBegin;
4524   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4525   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4526   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4527   if (!row) {
4528     start = A->rmap->rstart; end = A->rmap->rend;
4529     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4530   } else {
4531     isrowa = *row;
4532   }
4533   if (!col) {
4534     start = A->cmap->rstart;
4535     cmap  = a->garray;
4536     nzA   = a->A->cmap->n;
4537     nzB   = a->B->cmap->n;
4538     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
4539     ncols = 0;
4540     for (i=0; i<nzB; i++) {
4541       if (cmap[i] < start) idx[ncols++] = cmap[i];
4542       else break;
4543     }
4544     imark = i;
4545     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4546     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4547     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4548   } else {
4549     iscola = *col;
4550   }
4551   if (scall != MAT_INITIAL_MATRIX) {
4552     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4553     aloc[0] = *A_loc;
4554   }
4555   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4556   *A_loc = aloc[0];
4557   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4558   if (!row) {
4559     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4560   }
4561   if (!col) {
4562     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4563   }
4564   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4565   PetscFunctionReturn(0);
4566 }
4567 
4568 #undef __FUNCT__
4569 #define __FUNCT__ "MatGetBrowsOfAcols"
4570 /*@C
4571     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4572 
4573     Collective on Mat
4574 
4575    Input Parameters:
4576 +    A,B - the matrices in mpiaij format
4577 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4578 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4579 
4580    Output Parameter:
4581 +    rowb, colb - index sets of rows and columns of B to extract
4582 -    B_seq - the sequential matrix generated
4583 
4584     Level: developer
4585 
4586 @*/
4587 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4588 {
4589   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4590   PetscErrorCode ierr;
4591   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4592   IS             isrowb,iscolb;
4593   Mat            *bseq=NULL;
4594 
4595   PetscFunctionBegin;
4596   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4597     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4598   }
4599   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4600 
4601   if (scall == MAT_INITIAL_MATRIX) {
4602     start = A->cmap->rstart;
4603     cmap  = a->garray;
4604     nzA   = a->A->cmap->n;
4605     nzB   = a->B->cmap->n;
4606     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
4607     ncols = 0;
4608     for (i=0; i<nzB; i++) {  /* row < local row index */
4609       if (cmap[i] < start) idx[ncols++] = cmap[i];
4610       else break;
4611     }
4612     imark = i;
4613     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4614     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4615     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4616     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4617   } else {
4618     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4619     isrowb  = *rowb; iscolb = *colb;
4620     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4621     bseq[0] = *B_seq;
4622   }
4623   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4624   *B_seq = bseq[0];
4625   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4626   if (!rowb) {
4627     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4628   } else {
4629     *rowb = isrowb;
4630   }
4631   if (!colb) {
4632     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4633   } else {
4634     *colb = iscolb;
4635   }
4636   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4637   PetscFunctionReturn(0);
4638 }
4639 
4640 #undef __FUNCT__
4641 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4642 /*
4643     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4644     of the OFF-DIAGONAL portion of local A
4645 
4646     Collective on Mat
4647 
4648    Input Parameters:
4649 +    A,B - the matrices in mpiaij format
4650 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4651 
4652    Output Parameter:
4653 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4654 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4655 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4656 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4657 
4658     Level: developer
4659 
4660 */
4661 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4662 {
4663   VecScatter_MPI_General *gen_to,*gen_from;
4664   PetscErrorCode         ierr;
4665   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4666   Mat_SeqAIJ             *b_oth;
4667   VecScatter             ctx =a->Mvctx;
4668   MPI_Comm               comm;
4669   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4670   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4671   PetscScalar            *rvalues,*svalues;
4672   MatScalar              *b_otha,*bufa,*bufA;
4673   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4674   MPI_Request            *rwaits = NULL,*swaits = NULL;
4675   MPI_Status             *sstatus,rstatus;
4676   PetscMPIInt            jj,size;
4677   PetscInt               *cols,sbs,rbs;
4678   PetscScalar            *vals;
4679 
4680   PetscFunctionBegin;
4681   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4682   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4683   if (size == 1) PetscFunctionReturn(0);
4684 
4685   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4686     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4687   }
4688   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4689   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4690 
4691   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4692   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4693   rvalues  = gen_from->values; /* holds the length of receiving row */
4694   svalues  = gen_to->values;   /* holds the length of sending row */
4695   nrecvs   = gen_from->n;
4696   nsends   = gen_to->n;
4697 
4698   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4699   srow    = gen_to->indices;    /* local row index to be sent */
4700   sstarts = gen_to->starts;
4701   sprocs  = gen_to->procs;
4702   sstatus = gen_to->sstatus;
4703   sbs     = gen_to->bs;
4704   rstarts = gen_from->starts;
4705   rprocs  = gen_from->procs;
4706   rbs     = gen_from->bs;
4707 
4708   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4709   if (scall == MAT_INITIAL_MATRIX) {
4710     /* i-array */
4711     /*---------*/
4712     /*  post receives */
4713     for (i=0; i<nrecvs; i++) {
4714       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4715       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4716       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4717     }
4718 
4719     /* pack the outgoing message */
4720     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4721 
4722     sstartsj[0] = 0;
4723     rstartsj[0] = 0;
4724     len         = 0; /* total length of j or a array to be sent */
4725     k           = 0;
4726     for (i=0; i<nsends; i++) {
4727       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4728       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4729       for (j=0; j<nrows; j++) {
4730         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4731         for (l=0; l<sbs; l++) {
4732           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4733 
4734           rowlen[j*sbs+l] = ncols;
4735 
4736           len += ncols;
4737           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4738         }
4739         k++;
4740       }
4741       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4742 
4743       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4744     }
4745     /* recvs and sends of i-array are completed */
4746     i = nrecvs;
4747     while (i--) {
4748       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4749     }
4750     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4751 
4752     /* allocate buffers for sending j and a arrays */
4753     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
4754     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
4755 
4756     /* create i-array of B_oth */
4757     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
4758 
4759     b_othi[0] = 0;
4760     len       = 0; /* total length of j or a array to be received */
4761     k         = 0;
4762     for (i=0; i<nrecvs; i++) {
4763       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4764       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4765       for (j=0; j<nrows; j++) {
4766         b_othi[k+1] = b_othi[k] + rowlen[j];
4767         len        += rowlen[j]; k++;
4768       }
4769       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4770     }
4771 
4772     /* allocate space for j and a arrrays of B_oth */
4773     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
4774     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
4775 
4776     /* j-array */
4777     /*---------*/
4778     /*  post receives of j-array */
4779     for (i=0; i<nrecvs; i++) {
4780       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4781       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4782     }
4783 
4784     /* pack the outgoing message j-array */
4785     k = 0;
4786     for (i=0; i<nsends; i++) {
4787       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4788       bufJ  = bufj+sstartsj[i];
4789       for (j=0; j<nrows; j++) {
4790         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4791         for (ll=0; ll<sbs; ll++) {
4792           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4793           for (l=0; l<ncols; l++) {
4794             *bufJ++ = cols[l];
4795           }
4796           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4797         }
4798       }
4799       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4800     }
4801 
4802     /* recvs and sends of j-array are completed */
4803     i = nrecvs;
4804     while (i--) {
4805       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4806     }
4807     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4808   } else if (scall == MAT_REUSE_MATRIX) {
4809     sstartsj = *startsj_s;
4810     rstartsj = *startsj_r;
4811     bufa     = *bufa_ptr;
4812     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4813     b_otha   = b_oth->a;
4814   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4815 
4816   /* a-array */
4817   /*---------*/
4818   /*  post receives of a-array */
4819   for (i=0; i<nrecvs; i++) {
4820     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4821     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4822   }
4823 
4824   /* pack the outgoing message a-array */
4825   k = 0;
4826   for (i=0; i<nsends; i++) {
4827     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4828     bufA  = bufa+sstartsj[i];
4829     for (j=0; j<nrows; j++) {
4830       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4831       for (ll=0; ll<sbs; ll++) {
4832         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4833         for (l=0; l<ncols; l++) {
4834           *bufA++ = vals[l];
4835         }
4836         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4837       }
4838     }
4839     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4840   }
4841   /* recvs and sends of a-array are completed */
4842   i = nrecvs;
4843   while (i--) {
4844     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4845   }
4846   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4847   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4848 
4849   if (scall == MAT_INITIAL_MATRIX) {
4850     /* put together the new matrix */
4851     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4852 
4853     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4854     /* Since these are PETSc arrays, change flags to free them as necessary. */
4855     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4856     b_oth->free_a  = PETSC_TRUE;
4857     b_oth->free_ij = PETSC_TRUE;
4858     b_oth->nonew   = 0;
4859 
4860     ierr = PetscFree(bufj);CHKERRQ(ierr);
4861     if (!startsj_s || !bufa_ptr) {
4862       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4863       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4864     } else {
4865       *startsj_s = sstartsj;
4866       *startsj_r = rstartsj;
4867       *bufa_ptr  = bufa;
4868     }
4869   }
4870   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4871   PetscFunctionReturn(0);
4872 }
4873 
4874 #undef __FUNCT__
4875 #define __FUNCT__ "MatGetCommunicationStructs"
4876 /*@C
4877   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4878 
4879   Not Collective
4880 
4881   Input Parameters:
4882 . A - The matrix in mpiaij format
4883 
4884   Output Parameter:
4885 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4886 . colmap - A map from global column index to local index into lvec
4887 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4888 
4889   Level: developer
4890 
4891 @*/
4892 #if defined(PETSC_USE_CTABLE)
4893 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4894 #else
4895 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4896 #endif
4897 {
4898   Mat_MPIAIJ *a;
4899 
4900   PetscFunctionBegin;
4901   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4902   PetscValidPointer(lvec, 2);
4903   PetscValidPointer(colmap, 3);
4904   PetscValidPointer(multScatter, 4);
4905   a = (Mat_MPIAIJ*) A->data;
4906   if (lvec) *lvec = a->lvec;
4907   if (colmap) *colmap = a->colmap;
4908   if (multScatter) *multScatter = a->Mvctx;
4909   PetscFunctionReturn(0);
4910 }
4911 
4912 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4913 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4914 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4915 #if defined(PETSC_HAVE_ELEMENTAL)
4916 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4917 #endif
4918 
4919 #undef __FUNCT__
4920 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4921 /*
4922     Computes (B'*A')' since computing B*A directly is untenable
4923 
4924                n                       p                          p
4925         (              )       (              )         (                  )
4926       m (      A       )  *  n (       B      )   =   m (         C        )
4927         (              )       (              )         (                  )
4928 
4929 */
4930 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4931 {
4932   PetscErrorCode ierr;
4933   Mat            At,Bt,Ct;
4934 
4935   PetscFunctionBegin;
4936   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4937   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4938   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4939   ierr = MatDestroy(&At);CHKERRQ(ierr);
4940   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4941   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4942   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4943   PetscFunctionReturn(0);
4944 }
4945 
4946 #undef __FUNCT__
4947 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4948 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4949 {
4950   PetscErrorCode ierr;
4951   PetscInt       m=A->rmap->n,n=B->cmap->n;
4952   Mat            Cmat;
4953 
4954   PetscFunctionBegin;
4955   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4956   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4957   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4958   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4959   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4960   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4961   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4962   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4963 
4964   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4965 
4966   *C = Cmat;
4967   PetscFunctionReturn(0);
4968 }
4969 
4970 /* ----------------------------------------------------------------*/
4971 #undef __FUNCT__
4972 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4973 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4974 {
4975   PetscErrorCode ierr;
4976 
4977   PetscFunctionBegin;
4978   if (scall == MAT_INITIAL_MATRIX) {
4979     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4980     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4981     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4982   }
4983   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4984   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4985   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4986   PetscFunctionReturn(0);
4987 }
4988 
4989 /*MC
4990    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4991 
4992    Options Database Keys:
4993 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4994 
4995   Level: beginner
4996 
4997 .seealso: MatCreateAIJ()
4998 M*/
4999 
5000 #undef __FUNCT__
5001 #define __FUNCT__ "MatCreate_MPIAIJ"
5002 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5003 {
5004   Mat_MPIAIJ     *b;
5005   PetscErrorCode ierr;
5006   PetscMPIInt    size;
5007 
5008   PetscFunctionBegin;
5009   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5010 
5011   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5012   B->data       = (void*)b;
5013   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5014   B->assembled  = PETSC_FALSE;
5015   B->insertmode = NOT_SET_VALUES;
5016   b->size       = size;
5017 
5018   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5019 
5020   /* build cache for off array entries formed */
5021   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5022 
5023   b->donotstash  = PETSC_FALSE;
5024   b->colmap      = 0;
5025   b->garray      = 0;
5026   b->roworiented = PETSC_TRUE;
5027 
5028   /* stuff used for matrix vector multiply */
5029   b->lvec  = NULL;
5030   b->Mvctx = NULL;
5031 
5032   /* stuff for MatGetRow() */
5033   b->rowindices   = 0;
5034   b->rowvalues    = 0;
5035   b->getrowactive = PETSC_FALSE;
5036 
5037   /* flexible pointer used in CUSP/CUSPARSE classes */
5038   b->spptr = NULL;
5039 
5040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5050 #if defined(PETSC_HAVE_ELEMENTAL)
5051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5052 #endif
5053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5056   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5057   PetscFunctionReturn(0);
5058 }
5059 
5060 #undef __FUNCT__
5061 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5062 /*@C
5063      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5064          and "off-diagonal" part of the matrix in CSR format.
5065 
5066    Collective on MPI_Comm
5067 
5068    Input Parameters:
5069 +  comm - MPI communicator
5070 .  m - number of local rows (Cannot be PETSC_DECIDE)
5071 .  n - This value should be the same as the local size used in creating the
5072        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5073        calculated if N is given) For square matrices n is almost always m.
5074 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5075 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5076 .   i - row indices for "diagonal" portion of matrix
5077 .   j - column indices
5078 .   a - matrix values
5079 .   oi - row indices for "off-diagonal" portion of matrix
5080 .   oj - column indices
5081 -   oa - matrix values
5082 
5083    Output Parameter:
5084 .   mat - the matrix
5085 
5086    Level: advanced
5087 
5088    Notes:
5089        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5090        must free the arrays once the matrix has been destroyed and not before.
5091 
5092        The i and j indices are 0 based
5093 
5094        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5095 
5096        This sets local rows and cannot be used to set off-processor values.
5097 
5098        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5099        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5100        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5101        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5102        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5103        communication if it is known that only local entries will be set.
5104 
5105 .keywords: matrix, aij, compressed row, sparse, parallel
5106 
5107 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5108           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5109 C@*/
5110 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5111 {
5112   PetscErrorCode ierr;
5113   Mat_MPIAIJ     *maij;
5114 
5115   PetscFunctionBegin;
5116   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5117   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5118   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5119   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5120   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5121   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5122   maij = (Mat_MPIAIJ*) (*mat)->data;
5123 
5124   (*mat)->preallocated = PETSC_TRUE;
5125 
5126   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5127   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5128 
5129   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5130   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5131 
5132   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5133   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5134   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5135   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5136 
5137   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5138   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5139   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5140   PetscFunctionReturn(0);
5141 }
5142 
5143 /*
5144     Special version for direct calls from Fortran
5145 */
5146 #include <petsc-private/fortranimpl.h>
5147 
5148 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5149 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5150 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5151 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5152 #endif
5153 
5154 /* Change these macros so can be used in void function */
5155 #undef CHKERRQ
5156 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5157 #undef SETERRQ2
5158 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5159 #undef SETERRQ3
5160 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5161 #undef SETERRQ
5162 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5163 
5164 #undef __FUNCT__
5165 #define __FUNCT__ "matsetvaluesmpiaij_"
5166 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5167 {
5168   Mat            mat  = *mmat;
5169   PetscInt       m    = *mm, n = *mn;
5170   InsertMode     addv = *maddv;
5171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5172   PetscScalar    value;
5173   PetscErrorCode ierr;
5174 
5175   MatCheckPreallocated(mat,1);
5176   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5177 
5178 #if defined(PETSC_USE_DEBUG)
5179   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5180 #endif
5181   {
5182     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5183     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5184     PetscBool roworiented = aij->roworiented;
5185 
5186     /* Some Variables required in the macro */
5187     Mat        A                 = aij->A;
5188     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5189     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5190     MatScalar  *aa               = a->a;
5191     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5192     Mat        B                 = aij->B;
5193     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5194     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5195     MatScalar  *ba               = b->a;
5196 
5197     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5198     PetscInt  nonew = a->nonew;
5199     MatScalar *ap1,*ap2;
5200 
5201     PetscFunctionBegin;
5202     for (i=0; i<m; i++) {
5203       if (im[i] < 0) continue;
5204 #if defined(PETSC_USE_DEBUG)
5205       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5206 #endif
5207       if (im[i] >= rstart && im[i] < rend) {
5208         row      = im[i] - rstart;
5209         lastcol1 = -1;
5210         rp1      = aj + ai[row];
5211         ap1      = aa + ai[row];
5212         rmax1    = aimax[row];
5213         nrow1    = ailen[row];
5214         low1     = 0;
5215         high1    = nrow1;
5216         lastcol2 = -1;
5217         rp2      = bj + bi[row];
5218         ap2      = ba + bi[row];
5219         rmax2    = bimax[row];
5220         nrow2    = bilen[row];
5221         low2     = 0;
5222         high2    = nrow2;
5223 
5224         for (j=0; j<n; j++) {
5225           if (roworiented) value = v[i*n+j];
5226           else value = v[i+j*m];
5227           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5228           if (in[j] >= cstart && in[j] < cend) {
5229             col = in[j] - cstart;
5230             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5231           } else if (in[j] < 0) continue;
5232 #if defined(PETSC_USE_DEBUG)
5233           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5234 #endif
5235           else {
5236             if (mat->was_assembled) {
5237               if (!aij->colmap) {
5238                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5239               }
5240 #if defined(PETSC_USE_CTABLE)
5241               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5242               col--;
5243 #else
5244               col = aij->colmap[in[j]] - 1;
5245 #endif
5246               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5247                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5248                 col  =  in[j];
5249                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5250                 B     = aij->B;
5251                 b     = (Mat_SeqAIJ*)B->data;
5252                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5253                 rp2   = bj + bi[row];
5254                 ap2   = ba + bi[row];
5255                 rmax2 = bimax[row];
5256                 nrow2 = bilen[row];
5257                 low2  = 0;
5258                 high2 = nrow2;
5259                 bm    = aij->B->rmap->n;
5260                 ba    = b->a;
5261               }
5262             } else col = in[j];
5263             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5264           }
5265         }
5266       } else if (!aij->donotstash) {
5267         if (roworiented) {
5268           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5269         } else {
5270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5271         }
5272       }
5273     }
5274   }
5275   PetscFunctionReturnVoid();
5276 }
5277 
5278