xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a593226baf5c94d3e4393e03cae3fa59cb94e469)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
109 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
110 {
111   PetscErrorCode    ierr;
112   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
113 
114   PetscFunctionBegin;
115   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
116     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
117   } else {
118     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
119   }
120   PetscFunctionReturn(0);
121 }
122 
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
127 {
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
129   PetscErrorCode ierr;
130   PetscInt       i,rstart,nrows,*rows;
131 
132   PetscFunctionBegin;
133   *zrows = NULL;
134   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
135   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
136   for (i=0; i<nrows; i++) rows[i] += rstart;
137   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
144 {
145   PetscErrorCode ierr;
146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
147   PetscInt       i,n,*garray = aij->garray;
148   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
149   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
150   PetscReal      *work;
151 
152   PetscFunctionBegin;
153   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
154   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
155   if (type == NORM_2) {
156     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
157       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
158     }
159     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
160       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
161     }
162   } else if (type == NORM_1) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
168     }
169   } else if (type == NORM_INFINITY) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
175     }
176 
177   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
178   if (type == NORM_INFINITY) {
179     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
180   } else {
181     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
182   }
183   ierr = PetscFree(work);CHKERRQ(ierr);
184   if (type == NORM_2) {
185     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatDistribute_MPIAIJ"
192 /*
193     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
194     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
195 
196     Only for square matrices
197 
198     Used by a preconditioner, hence PETSC_EXTERN
199 */
200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
201 {
202   PetscMPIInt    rank,size;
203   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
204   PetscErrorCode ierr;
205   Mat            mat;
206   Mat_SeqAIJ     *gmata;
207   PetscMPIInt    tag;
208   MPI_Status     status;
209   PetscBool      aij;
210   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215   if (!rank) {
216     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
217     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
218   }
219   if (reuse == MAT_INITIAL_MATRIX) {
220     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
221     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
222     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
223     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
224     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
225     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
226     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
227     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
228     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
229 
230     rowners[0] = 0;
231     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
232     rstart = rowners[rank];
233     rend   = rowners[rank+1];
234     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
235     if (!rank) {
236       gmata = (Mat_SeqAIJ*) gmat->data;
237       /* send row lengths to all processors */
238       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
239       for (i=1; i<size; i++) {
240         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242       /* determine number diagonal and off-diagonal counts */
243       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
244       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
245       jj   = 0;
246       for (i=0; i<m; i++) {
247         for (j=0; j<dlens[i]; j++) {
248           if (gmata->j[jj] < rstart) ld[i]++;
249           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
250           jj++;
251         }
252       }
253       /* send column indices to other processes */
254       for (i=1; i<size; i++) {
255         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
256         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
257         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259 
260       /* send numerical values to other processes */
261       for (i=1; i<size; i++) {
262         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
263         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
264       }
265       gmataa = gmata->a;
266       gmataj = gmata->j;
267 
268     } else {
269       /* receive row lengths */
270       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
271       /* receive column indices */
272       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
273       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
275       /* determine number diagonal and off-diagonal counts */
276       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
277       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
278       jj   = 0;
279       for (i=0; i<m; i++) {
280         for (j=0; j<dlens[i]; j++) {
281           if (gmataj[jj] < rstart) ld[i]++;
282           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
283           jj++;
284         }
285       }
286       /* receive numerical values */
287       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
288       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
289     }
290     /* set preallocation */
291     for (i=0; i<m; i++) {
292       dlens[i] -= olens[i];
293     }
294     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
295     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
296 
297     for (i=0; i<m; i++) {
298       dlens[i] += olens[i];
299     }
300     cnt = 0;
301     for (i=0; i<m; i++) {
302       row  = rstart + i;
303       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
304       cnt += dlens[i];
305     }
306     if (rank) {
307       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
308     }
309     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
310     ierr = PetscFree(rowners);CHKERRQ(ierr);
311 
312     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
313 
314     *inmat = mat;
315   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
316     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
317     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
318     mat  = *inmat;
319     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
320     if (!rank) {
321       /* send numerical values to other processes */
322       gmata  = (Mat_SeqAIJ*) gmat->data;
323       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
324       gmataa = gmata->a;
325       for (i=1; i<size; i++) {
326         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
327         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
328       }
329       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
330     } else {
331       /* receive numerical values from process 0*/
332       nz   = Ad->nz + Ao->nz;
333       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
334       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
335     }
336     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
337     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
338     ad = Ad->a;
339     ao = Ao->a;
340     if (mat->rmap->n) {
341       i  = 0;
342       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
343       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
344     }
345     for (i=1; i<mat->rmap->n; i++) {
346       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
347       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
348     }
349     i--;
350     if (mat->rmap->n) {
351       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
352     }
353     if (rank) {
354       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
355     }
356   }
357   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to acess.
368 */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
372 {
373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
374   PetscErrorCode ierr;
375   PetscInt       n = aij->B->cmap->n,i;
376 
377   PetscFunctionBegin;
378   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
379 #if defined(PETSC_USE_CTABLE)
380   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
381   for (i=0; i<n; i++) {
382     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
383   }
384 #else
385   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
386   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
387   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
388 #endif
389   PetscFunctionReturn(0);
390 }
391 
392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
393 { \
394     if (col <= lastcol1)  low1 = 0;     \
395     else                 high1 = nrow1; \
396     lastcol1 = col;\
397     while (high1-low1 > 5) { \
398       t = (low1+high1)/2; \
399       if (rp1[t] > col) high1 = t; \
400       else              low1  = t; \
401     } \
402       for (_i=low1; _i<high1; _i++) { \
403         if (rp1[_i] > col) break; \
404         if (rp1[_i] == col) { \
405           if (addv == ADD_VALUES) ap1[_i] += value;   \
406           else                    ap1[_i] = value; \
407           goto a_noinsert; \
408         } \
409       }  \
410       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
411       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
412       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
413       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
414       N = nrow1++ - 1; a->nz++; high1++; \
415       /* shift up all the later entries in this row */ \
416       for (ii=N; ii>=_i; ii--) { \
417         rp1[ii+1] = rp1[ii]; \
418         ap1[ii+1] = ap1[ii]; \
419       } \
420       rp1[_i] = col;  \
421       ap1[_i] = value;  \
422       A->nonzerostate++;\
423       a_noinsert: ; \
424       ailen[row] = nrow1; \
425 }
426 
427 
428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
429   { \
430     if (col <= lastcol2) low2 = 0;                        \
431     else high2 = nrow2;                                   \
432     lastcol2 = col;                                       \
433     while (high2-low2 > 5) {                              \
434       t = (low2+high2)/2;                                 \
435       if (rp2[t] > col) high2 = t;                        \
436       else             low2  = t;                         \
437     }                                                     \
438     for (_i=low2; _i<high2; _i++) {                       \
439       if (rp2[_i] > col) break;                           \
440       if (rp2[_i] == col) {                               \
441         if (addv == ADD_VALUES) ap2[_i] += value;         \
442         else                    ap2[_i] = value;          \
443         goto b_noinsert;                                  \
444       }                                                   \
445     }                                                     \
446     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
447     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
448     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
449     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
450     N = nrow2++ - 1; b->nz++; high2++;                    \
451     /* shift up all the later entries in this row */      \
452     for (ii=N; ii>=_i; ii--) {                            \
453       rp2[ii+1] = rp2[ii];                                \
454       ap2[ii+1] = ap2[ii];                                \
455     }                                                     \
456     rp2[_i] = col;                                        \
457     ap2[_i] = value;                                      \
458     B->nonzerostate++;                                    \
459     b_noinsert: ;                                         \
460     bilen[row] = nrow2;                                   \
461   }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
466 {
467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
468   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
469   PetscErrorCode ierr;
470   PetscInt       l,*garray = mat->garray,diag;
471 
472   PetscFunctionBegin;
473   /* code only works for square matrices A */
474 
475   /* find size of row to the left of the diagonal part */
476   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
477   row  = row - diag;
478   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
479     if (garray[b->j[b->i[row]+l]] > diag) break;
480   }
481   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
482 
483   /* diagonal part */
484   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
485 
486   /* right of diagonal part */
487   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetValues_MPIAIJ"
493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
494 {
495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
496   PetscScalar    value;
497   PetscErrorCode ierr;
498   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
499   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
500   PetscBool      roworiented = aij->roworiented;
501 
502   /* Some Variables required in the macro */
503   Mat        A                 = aij->A;
504   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
505   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
506   MatScalar  *aa               = a->a;
507   PetscBool  ignorezeroentries = a->ignorezeroentries;
508   Mat        B                 = aij->B;
509   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
510   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
511   MatScalar  *ba               = b->a;
512 
513   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
514   PetscInt  nonew;
515   MatScalar *ap1,*ap2;
516 
517   PetscFunctionBegin;
518   for (i=0; i<m; i++) {
519     if (im[i] < 0) continue;
520 #if defined(PETSC_USE_DEBUG)
521     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
522 #endif
523     if (im[i] >= rstart && im[i] < rend) {
524       row      = im[i] - rstart;
525       lastcol1 = -1;
526       rp1      = aj + ai[row];
527       ap1      = aa + ai[row];
528       rmax1    = aimax[row];
529       nrow1    = ailen[row];
530       low1     = 0;
531       high1    = nrow1;
532       lastcol2 = -1;
533       rp2      = bj + bi[row];
534       ap2      = ba + bi[row];
535       rmax2    = bimax[row];
536       nrow2    = bilen[row];
537       low2     = 0;
538       high2    = nrow2;
539 
540       for (j=0; j<n; j++) {
541         if (roworiented) value = v[i*n+j];
542         else             value = v[i+j*m];
543         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
544         if (in[j] >= cstart && in[j] < cend) {
545           col   = in[j] - cstart;
546           nonew = a->nonew;
547           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
548         } else if (in[j] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
551 #endif
552         else {
553           if (mat->was_assembled) {
554             if (!aij->colmap) {
555               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
556             }
557 #if defined(PETSC_USE_CTABLE)
558             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
559             col--;
560 #else
561             col = aij->colmap[in[j]] - 1;
562 #endif
563             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
564               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
565               col  =  in[j];
566               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
567               B     = aij->B;
568               b     = (Mat_SeqAIJ*)B->data;
569               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
570               rp2   = bj + bi[row];
571               ap2   = ba + bi[row];
572               rmax2 = bimax[row];
573               nrow2 = bilen[row];
574               low2  = 0;
575               high2 = nrow2;
576               bm    = aij->B->rmap->n;
577               ba    = b->a;
578             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
579           } else col = in[j];
580           nonew = b->nonew;
581           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
582         }
583       }
584     } else {
585       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
586       if (!aij->donotstash) {
587         mat->assembled = PETSC_FALSE;
588         if (roworiented) {
589           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
590         } else {
591           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
592         }
593       }
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatGetValues_MPIAIJ"
601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
602 {
603   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
604   PetscErrorCode ierr;
605   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
607 
608   PetscFunctionBegin;
609   for (i=0; i<m; i++) {
610     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
611     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
612     if (idxm[i] >= rstart && idxm[i] < rend) {
613       row = idxm[i] - rstart;
614       for (j=0; j<n; j++) {
615         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
616         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
617         if (idxn[j] >= cstart && idxn[j] < cend) {
618           col  = idxn[j] - cstart;
619           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620         } else {
621           if (!aij->colmap) {
622             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
623           }
624 #if defined(PETSC_USE_CTABLE)
625           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
626           col--;
627 #else
628           col = aij->colmap[idxn[j]] - 1;
629 #endif
630           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
631           else {
632             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   /* make sure all processors are either in INSERTMODE or ADDMODE */
656   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
658   mat->insertmode = addv; /* in case this processor had no cache */
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
672   PetscErrorCode ierr;
673   PetscMPIInt    n;
674   PetscInt       i,j,rstart,ncols,flg;
675   PetscInt       *row,*col;
676   PetscBool      other_disassembled;
677   PetscScalar    *val;
678   InsertMode     addv = mat->insertmode;
679 
680   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
681 
682   PetscFunctionBegin;
683   if (!aij->donotstash && !mat->nooffprocentries) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       if (!flg) break;
687 
688       for (i=0; i<n; ) {
689         /* Now identify the consecutive vals belonging to the same row */
690         for (j=i,rstart=row[j]; j<n; j++) {
691           if (row[j] != rstart) break;
692         }
693         if (j < n) ncols = j-i;
694         else       ncols = n-i;
695         /* Now assemble all these values with a single function call */
696         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
697 
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702   }
703   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
704   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
705 
706   /* determine if any processor has disassembled, if so we must
707      also disassemble ourselfs, in order that we may reassemble. */
708   /*
709      if nonzero structure of submatrix B cannot change then we know that
710      no processor disassembled thus we can skip this stuff
711   */
712   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
713     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714     if (mat->was_assembled && !other_disassembled) {
715       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
716     }
717   }
718   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
719     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
720   }
721   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
722   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
723   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
724 
725   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
726 
727   aij->rowvalues = 0;
728 
729   /* used by MatAXPY() */
730   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
731   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
732 
733   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
734   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
735 
736   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
737   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
738     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
739     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
747 {
748   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
753   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatZeroRows_MPIAIJ"
759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
760 {
761   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
762   PetscInt      *owners = A->rmap->range;
763   PetscInt       n      = A->rmap->n;
764   PetscSF        sf;
765   PetscInt      *lrows;
766   PetscSFNode   *rrows;
767   PetscInt       r, p = 0, len = 0;
768   PetscErrorCode ierr;
769 
770   PetscFunctionBegin;
771   /* Create SF where leaves are input rows and roots are owned rows */
772   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
773   for (r = 0; r < n; ++r) lrows[r] = -1;
774   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
775   for (r = 0; r < N; ++r) {
776     const PetscInt idx   = rows[r];
777     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
778     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
779       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
780     }
781     if (A->nooffproczerorows) {
782       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
783       lrows[len++] = idx - owners[p];
784     } else {
785       rrows[r].rank = p;
786       rrows[r].index = rows[r] - owners[p];
787     }
788   }
789   if (!A->nooffproczerorows) {
790     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
791     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
792     /* Collect flags for rows to be zeroed */
793     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
794     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
795     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
796     /* Compress and put in row numbers */
797     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
798   }
799   /* fix right hand side if needed */
800   if (x && b) {
801     const PetscScalar *xx;
802     PetscScalar       *bb;
803 
804     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
805     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
806     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
807     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
808     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
809   }
810   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
811   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_MPIAIJ"
1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1122 {
1123   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127 #if defined(PETSC_USE_LOG)
1128   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1129 #endif
1130   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1132   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1133   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1134 #if defined(PETSC_USE_CTABLE)
1135   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1136 #else
1137   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1138 #endif
1139   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1140   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1141   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1142   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1143   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1144   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1145 
1146   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1154   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1155 #if defined(PETSC_HAVE_ELEMENTAL)
1156   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1157 #endif
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1163 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1164 {
1165   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1166   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1167   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1168   PetscErrorCode ierr;
1169   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1170   int            fd;
1171   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1172   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1173   PetscScalar    *column_values;
1174   PetscInt       message_count,flowcontrolcount;
1175   FILE           *file;
1176 
1177   PetscFunctionBegin;
1178   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1180   nz   = A->nz + B->nz;
1181   if (!rank) {
1182     header[0] = MAT_FILE_CLASSID;
1183     header[1] = mat->rmap->N;
1184     header[2] = mat->cmap->N;
1185 
1186     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1187     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1188     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1189     /* get largest number of rows any processor has */
1190     rlen  = mat->rmap->n;
1191     range = mat->rmap->range;
1192     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1193   } else {
1194     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1195     rlen = mat->rmap->n;
1196   }
1197 
1198   /* load up the local row counts */
1199   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1200   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1201 
1202   /* store the row lengths to the file */
1203   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1204   if (!rank) {
1205     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1206     for (i=1; i<size; i++) {
1207       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1208       rlen = range[i+1] - range[i];
1209       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1211     }
1212     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1213   } else {
1214     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1215     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1216     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1217   }
1218   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1219 
1220   /* load up the local column indices */
1221   nzmax = nz; /* th processor needs space a largest processor needs */
1222   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1223   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1224   cnt   = 0;
1225   for (i=0; i<mat->rmap->n; i++) {
1226     for (j=B->i[i]; j<B->i[i+1]; j++) {
1227       if ((col = garray[B->j[j]]) > cstart) break;
1228       column_indices[cnt++] = col;
1229     }
1230     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1231     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1232   }
1233   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1234 
1235   /* store the column indices to the file */
1236   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1237   if (!rank) {
1238     MPI_Status status;
1239     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1240     for (i=1; i<size; i++) {
1241       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1242       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1243       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1244       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1245       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1246     }
1247     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1248   } else {
1249     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1250     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1255 
1256   /* load up the local column values */
1257   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1258   cnt  = 0;
1259   for (i=0; i<mat->rmap->n; i++) {
1260     for (j=B->i[i]; j<B->i[i+1]; j++) {
1261       if (garray[B->j[j]] > cstart) break;
1262       column_values[cnt++] = B->a[j];
1263     }
1264     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1265     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1266   }
1267   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1268 
1269   /* store the column values to the file */
1270   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1271   if (!rank) {
1272     MPI_Status status;
1273     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1274     for (i=1; i<size; i++) {
1275       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1276       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1277       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1278       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1279       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1280     }
1281     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1282   } else {
1283     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1284     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1286     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1287   }
1288   ierr = PetscFree(column_values);CHKERRQ(ierr);
1289 
1290   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1291   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #include <petscdraw.h>
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1298 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1299 {
1300   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1301   PetscErrorCode    ierr;
1302   PetscMPIInt       rank = aij->rank,size = aij->size;
1303   PetscBool         isdraw,iascii,isbinary;
1304   PetscViewer       sviewer;
1305   PetscViewerFormat format;
1306 
1307   PetscFunctionBegin;
1308   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1309   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1310   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1311   if (iascii) {
1312     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1313     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1314       MatInfo   info;
1315       PetscBool inodes;
1316 
1317       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1318       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1319       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1320       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1321       if (!inodes) {
1322         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1323                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1324       } else {
1325         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1326                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1327       }
1328       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1329       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1330       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1331       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1332       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1333       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1334       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1335       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1336       PetscFunctionReturn(0);
1337     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1338       PetscInt inodecount,inodelimit,*inodes;
1339       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1340       if (inodes) {
1341         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1342       } else {
1343         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1344       }
1345       PetscFunctionReturn(0);
1346     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1347       PetscFunctionReturn(0);
1348     }
1349   } else if (isbinary) {
1350     if (size == 1) {
1351       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1352       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1353     } else {
1354       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1355     }
1356     PetscFunctionReturn(0);
1357   } else if (isdraw) {
1358     PetscDraw draw;
1359     PetscBool isnull;
1360     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1361     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1362   }
1363 
1364   {
1365     /* assemble the entire matrix onto first processor. */
1366     Mat        A;
1367     Mat_SeqAIJ *Aloc;
1368     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1369     MatScalar  *a;
1370 
1371     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1372     if (!rank) {
1373       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1374     } else {
1375       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1376     }
1377     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1378     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1379     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1380     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1381     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1382 
1383     /* copy over the A part */
1384     Aloc = (Mat_SeqAIJ*)aij->A->data;
1385     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1386     row  = mat->rmap->rstart;
1387     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1388     for (i=0; i<m; i++) {
1389       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1390       row++;
1391       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1392     }
1393     aj = Aloc->j;
1394     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1395 
1396     /* copy over the B part */
1397     Aloc = (Mat_SeqAIJ*)aij->B->data;
1398     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1399     row  = mat->rmap->rstart;
1400     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1401     ct   = cols;
1402     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1403     for (i=0; i<m; i++) {
1404       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1405       row++;
1406       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1407     }
1408     ierr = PetscFree(ct);CHKERRQ(ierr);
1409     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1410     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411     /*
1412        Everyone has to call to draw the matrix since the graphics waits are
1413        synchronized across all processors that share the PetscDraw object
1414     */
1415     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1416     if (!rank) {
1417       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1418     }
1419     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1420     ierr = MatDestroy(&A);CHKERRQ(ierr);
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatView_MPIAIJ"
1427 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1428 {
1429   PetscErrorCode ierr;
1430   PetscBool      iascii,isdraw,issocket,isbinary;
1431 
1432   PetscFunctionBegin;
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1435   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1436   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1437   if (iascii || isdraw || isbinary || issocket) {
1438     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1439   }
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatSOR_MPIAIJ"
1445 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1446 {
1447   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1448   PetscErrorCode ierr;
1449   Vec            bb1 = 0;
1450   PetscBool      hasop;
1451 
1452   PetscFunctionBegin;
1453   if (flag == SOR_APPLY_UPPER) {
1454     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1455     PetscFunctionReturn(0);
1456   }
1457 
1458   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1459     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1460   }
1461 
1462   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1463     if (flag & SOR_ZERO_INITIAL_GUESS) {
1464       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1465       its--;
1466     }
1467 
1468     while (its--) {
1469       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1470       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471 
1472       /* update rhs: bb1 = bb - B*x */
1473       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1474       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1475 
1476       /* local sweep */
1477       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1478     }
1479   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1480     if (flag & SOR_ZERO_INITIAL_GUESS) {
1481       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1482       its--;
1483     }
1484     while (its--) {
1485       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487 
1488       /* update rhs: bb1 = bb - B*x */
1489       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1490       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1491 
1492       /* local sweep */
1493       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1494     }
1495   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500     while (its--) {
1501       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503 
1504       /* update rhs: bb1 = bb - B*x */
1505       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1506       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1507 
1508       /* local sweep */
1509       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1510     }
1511   } else if (flag & SOR_EISENSTAT) {
1512     Vec xx1;
1513 
1514     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1515     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1516 
1517     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519     if (!mat->diag) {
1520       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1521       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1522     }
1523     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1524     if (hasop) {
1525       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1526     } else {
1527       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1528     }
1529     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1530 
1531     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1532 
1533     /* local sweep */
1534     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1535     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1536     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1537   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1538 
1539   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatPermute_MPIAIJ"
1545 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1546 {
1547   Mat            aA,aB,Aperm;
1548   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1549   PetscScalar    *aa,*ba;
1550   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1551   PetscSF        rowsf,sf;
1552   IS             parcolp = NULL;
1553   PetscBool      done;
1554   PetscErrorCode ierr;
1555 
1556   PetscFunctionBegin;
1557   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1558   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1559   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1560   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1561 
1562   /* Invert row permutation to find out where my rows should go */
1563   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1564   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1565   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1566   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1567   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1568   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1569 
1570   /* Invert column permutation to find out where my columns should go */
1571   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1572   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1573   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1574   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1575   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1576   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1577   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1578 
1579   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1580   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1581   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1582 
1583   /* Find out where my gcols should go */
1584   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1585   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1586   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1587   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1588   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1589   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1590   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1591   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1592 
1593   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1594   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1595   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1596   for (i=0; i<m; i++) {
1597     PetscInt row = rdest[i],rowner;
1598     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1599     for (j=ai[i]; j<ai[i+1]; j++) {
1600       PetscInt cowner,col = cdest[aj[j]];
1601       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1602       if (rowner == cowner) dnnz[i]++;
1603       else onnz[i]++;
1604     }
1605     for (j=bi[i]; j<bi[i+1]; j++) {
1606       PetscInt cowner,col = gcdest[bj[j]];
1607       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1608       if (rowner == cowner) dnnz[i]++;
1609       else onnz[i]++;
1610     }
1611   }
1612   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1613   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1614   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1615   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1616   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1617 
1618   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1619   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1620   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1621   for (i=0; i<m; i++) {
1622     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1623     PetscInt j0,rowlen;
1624     rowlen = ai[i+1] - ai[i];
1625     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1626       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1627       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1628     }
1629     rowlen = bi[i+1] - bi[i];
1630     for (j0=j=0; j<rowlen; j0=j) {
1631       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1632       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1633     }
1634   }
1635   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1636   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1637   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1638   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1639   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1640   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1641   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1642   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1643   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1644   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1645   *B = Aperm;
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1651 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1652 {
1653   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1654   Mat            A    = mat->A,B = mat->B;
1655   PetscErrorCode ierr;
1656   PetscReal      isend[5],irecv[5];
1657 
1658   PetscFunctionBegin;
1659   info->block_size = 1.0;
1660   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1661 
1662   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1663   isend[3] = info->memory;  isend[4] = info->mallocs;
1664 
1665   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1666 
1667   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1668   isend[3] += info->memory;  isend[4] += info->mallocs;
1669   if (flag == MAT_LOCAL) {
1670     info->nz_used      = isend[0];
1671     info->nz_allocated = isend[1];
1672     info->nz_unneeded  = isend[2];
1673     info->memory       = isend[3];
1674     info->mallocs      = isend[4];
1675   } else if (flag == MAT_GLOBAL_MAX) {
1676     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1677 
1678     info->nz_used      = irecv[0];
1679     info->nz_allocated = irecv[1];
1680     info->nz_unneeded  = irecv[2];
1681     info->memory       = irecv[3];
1682     info->mallocs      = irecv[4];
1683   } else if (flag == MAT_GLOBAL_SUM) {
1684     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1685 
1686     info->nz_used      = irecv[0];
1687     info->nz_allocated = irecv[1];
1688     info->nz_unneeded  = irecv[2];
1689     info->memory       = irecv[3];
1690     info->mallocs      = irecv[4];
1691   }
1692   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1693   info->fill_ratio_needed = 0;
1694   info->factor_mallocs    = 0;
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatSetOption_MPIAIJ"
1700 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1701 {
1702   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1703   PetscErrorCode ierr;
1704 
1705   PetscFunctionBegin;
1706   switch (op) {
1707   case MAT_NEW_NONZERO_LOCATIONS:
1708   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1709   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1710   case MAT_KEEP_NONZERO_PATTERN:
1711   case MAT_NEW_NONZERO_LOCATION_ERR:
1712   case MAT_USE_INODES:
1713   case MAT_IGNORE_ZERO_ENTRIES:
1714     MatCheckPreallocated(A,1);
1715     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1716     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1717     break;
1718   case MAT_ROW_ORIENTED:
1719     a->roworiented = flg;
1720 
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1723     break;
1724   case MAT_NEW_DIAGONALS:
1725     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1726     break;
1727   case MAT_IGNORE_OFF_PROC_ENTRIES:
1728     a->donotstash = flg;
1729     break;
1730   case MAT_SPD:
1731     A->spd_set = PETSC_TRUE;
1732     A->spd     = flg;
1733     if (flg) {
1734       A->symmetric                  = PETSC_TRUE;
1735       A->structurally_symmetric     = PETSC_TRUE;
1736       A->symmetric_set              = PETSC_TRUE;
1737       A->structurally_symmetric_set = PETSC_TRUE;
1738     }
1739     break;
1740   case MAT_SYMMETRIC:
1741     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1742     break;
1743   case MAT_STRUCTURALLY_SYMMETRIC:
1744     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1745     break;
1746   case MAT_HERMITIAN:
1747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1748     break;
1749   case MAT_SYMMETRY_ETERNAL:
1750     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1751     break;
1752   default:
1753     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1754   }
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatGetRow_MPIAIJ"
1760 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1761 {
1762   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1763   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1764   PetscErrorCode ierr;
1765   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1766   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1767   PetscInt       *cmap,*idx_p;
1768 
1769   PetscFunctionBegin;
1770   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1771   mat->getrowactive = PETSC_TRUE;
1772 
1773   if (!mat->rowvalues && (idx || v)) {
1774     /*
1775         allocate enough space to hold information from the longest row.
1776     */
1777     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1778     PetscInt   max = 1,tmp;
1779     for (i=0; i<matin->rmap->n; i++) {
1780       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1781       if (max < tmp) max = tmp;
1782     }
1783     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1784   }
1785 
1786   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1787   lrow = row - rstart;
1788 
1789   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1790   if (!v)   {pvA = 0; pvB = 0;}
1791   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1792   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1793   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1794   nztot = nzA + nzB;
1795 
1796   cmap = mat->garray;
1797   if (v  || idx) {
1798     if (nztot) {
1799       /* Sort by increasing column numbers, assuming A and B already sorted */
1800       PetscInt imark = -1;
1801       if (v) {
1802         *v = v_p = mat->rowvalues;
1803         for (i=0; i<nzB; i++) {
1804           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1805           else break;
1806         }
1807         imark = i;
1808         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1809         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1810       }
1811       if (idx) {
1812         *idx = idx_p = mat->rowindices;
1813         if (imark > -1) {
1814           for (i=0; i<imark; i++) {
1815             idx_p[i] = cmap[cworkB[i]];
1816           }
1817         } else {
1818           for (i=0; i<nzB; i++) {
1819             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1820             else break;
1821           }
1822           imark = i;
1823         }
1824         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1825         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1826       }
1827     } else {
1828       if (idx) *idx = 0;
1829       if (v)   *v   = 0;
1830     }
1831   }
1832   *nz  = nztot;
1833   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1834   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1840 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1841 {
1842   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1843 
1844   PetscFunctionBegin;
1845   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1846   aij->getrowactive = PETSC_FALSE;
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatNorm_MPIAIJ"
1852 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1853 {
1854   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1855   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1856   PetscErrorCode ierr;
1857   PetscInt       i,j,cstart = mat->cmap->rstart;
1858   PetscReal      sum = 0.0;
1859   MatScalar      *v;
1860 
1861   PetscFunctionBegin;
1862   if (aij->size == 1) {
1863     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1864   } else {
1865     if (type == NORM_FROBENIUS) {
1866       v = amat->a;
1867       for (i=0; i<amat->nz; i++) {
1868         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1869       }
1870       v = bmat->a;
1871       for (i=0; i<bmat->nz; i++) {
1872         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1873       }
1874       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1875       *norm = PetscSqrtReal(*norm);
1876     } else if (type == NORM_1) { /* max column norm */
1877       PetscReal *tmp,*tmp2;
1878       PetscInt  *jj,*garray = aij->garray;
1879       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1880       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1881       *norm = 0.0;
1882       v     = amat->a; jj = amat->j;
1883       for (j=0; j<amat->nz; j++) {
1884         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1885       }
1886       v = bmat->a; jj = bmat->j;
1887       for (j=0; j<bmat->nz; j++) {
1888         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1889       }
1890       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1891       for (j=0; j<mat->cmap->N; j++) {
1892         if (tmp2[j] > *norm) *norm = tmp2[j];
1893       }
1894       ierr = PetscFree(tmp);CHKERRQ(ierr);
1895       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1896     } else if (type == NORM_INFINITY) { /* max row norm */
1897       PetscReal ntemp = 0.0;
1898       for (j=0; j<aij->A->rmap->n; j++) {
1899         v   = amat->a + amat->i[j];
1900         sum = 0.0;
1901         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1902           sum += PetscAbsScalar(*v); v++;
1903         }
1904         v = bmat->a + bmat->i[j];
1905         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1906           sum += PetscAbsScalar(*v); v++;
1907         }
1908         if (sum > ntemp) ntemp = sum;
1909       }
1910       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1911     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1912   }
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatTranspose_MPIAIJ"
1918 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1919 {
1920   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1921   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1922   PetscErrorCode ierr;
1923   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1924   PetscInt       cstart = A->cmap->rstart,ncol;
1925   Mat            B;
1926   MatScalar      *array;
1927 
1928   PetscFunctionBegin;
1929   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1930 
1931   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1932   ai = Aloc->i; aj = Aloc->j;
1933   bi = Bloc->i; bj = Bloc->j;
1934   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1935     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1936     PetscSFNode          *oloc;
1937     PETSC_UNUSED PetscSF sf;
1938 
1939     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1940     /* compute d_nnz for preallocation */
1941     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1942     for (i=0; i<ai[ma]; i++) {
1943       d_nnz[aj[i]]++;
1944       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1945     }
1946     /* compute local off-diagonal contributions */
1947     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1948     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1949     /* map those to global */
1950     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1951     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1952     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1953     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1954     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1955     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1956     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1957 
1958     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1959     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1960     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1961     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1962     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1963     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1964   } else {
1965     B    = *matout;
1966     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1967     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1968   }
1969 
1970   /* copy over the A part */
1971   array = Aloc->a;
1972   row   = A->rmap->rstart;
1973   for (i=0; i<ma; i++) {
1974     ncol = ai[i+1]-ai[i];
1975     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1976     row++;
1977     array += ncol; aj += ncol;
1978   }
1979   aj = Aloc->j;
1980   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1981 
1982   /* copy over the B part */
1983   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1984   array = Bloc->a;
1985   row   = A->rmap->rstart;
1986   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1987   cols_tmp = cols;
1988   for (i=0; i<mb; i++) {
1989     ncol = bi[i+1]-bi[i];
1990     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1991     row++;
1992     array += ncol; cols_tmp += ncol;
1993   }
1994   ierr = PetscFree(cols);CHKERRQ(ierr);
1995 
1996   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1997   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1998   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1999     *matout = B;
2000   } else {
2001     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2002   }
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2008 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2009 {
2010   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2011   Mat            a    = aij->A,b = aij->B;
2012   PetscErrorCode ierr;
2013   PetscInt       s1,s2,s3;
2014 
2015   PetscFunctionBegin;
2016   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2017   if (rr) {
2018     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2019     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2020     /* Overlap communication with computation. */
2021     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2022   }
2023   if (ll) {
2024     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2025     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2026     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2027   }
2028   /* scale  the diagonal block */
2029   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2030 
2031   if (rr) {
2032     /* Do a scatter end and then right scale the off-diagonal block */
2033     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2034     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2035   }
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 #undef __FUNCT__
2040 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2041 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2042 {
2043   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2044   PetscErrorCode ierr;
2045 
2046   PetscFunctionBegin;
2047   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatEqual_MPIAIJ"
2053 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2054 {
2055   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2056   Mat            a,b,c,d;
2057   PetscBool      flg;
2058   PetscErrorCode ierr;
2059 
2060   PetscFunctionBegin;
2061   a = matA->A; b = matA->B;
2062   c = matB->A; d = matB->B;
2063 
2064   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2065   if (flg) {
2066     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2067   }
2068   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatCopy_MPIAIJ"
2074 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2075 {
2076   PetscErrorCode ierr;
2077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2078   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2079 
2080   PetscFunctionBegin;
2081   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2082   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2083     /* because of the column compression in the off-processor part of the matrix a->B,
2084        the number of columns in a->B and b->B may be different, hence we cannot call
2085        the MatCopy() directly on the two parts. If need be, we can provide a more
2086        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2087        then copying the submatrices */
2088     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2089   } else {
2090     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2091     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2092   }
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 #undef __FUNCT__
2097 #define __FUNCT__ "MatSetUp_MPIAIJ"
2098 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2099 {
2100   PetscErrorCode ierr;
2101 
2102   PetscFunctionBegin;
2103   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 /*
2108    Computes the number of nonzeros per row needed for preallocation when X and Y
2109    have different nonzero structure.
2110 */
2111 #undef __FUNCT__
2112 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2113 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2114 {
2115   PetscInt       i,j,k,nzx,nzy;
2116 
2117   PetscFunctionBegin;
2118   /* Set the number of nonzeros in the new matrix */
2119   for (i=0; i<m; i++) {
2120     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2121     nzx = xi[i+1] - xi[i];
2122     nzy = yi[i+1] - yi[i];
2123     nnz[i] = 0;
2124     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2125       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2126       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2127       nnz[i]++;
2128     }
2129     for (; k<nzy; k++) nnz[i]++;
2130   }
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2137 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2138 {
2139   PetscErrorCode ierr;
2140   PetscInt       m = Y->rmap->N;
2141   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2142   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2143 
2144   PetscFunctionBegin;
2145   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 #undef __FUNCT__
2150 #define __FUNCT__ "MatAXPY_MPIAIJ"
2151 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2152 {
2153   PetscErrorCode ierr;
2154   PetscInt       i;
2155   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2156   PetscBLASInt   bnz,one=1;
2157   Mat_SeqAIJ     *x,*y;
2158 
2159   PetscFunctionBegin;
2160   if (str == SAME_NONZERO_PATTERN) {
2161     PetscScalar alpha = a;
2162     x    = (Mat_SeqAIJ*)xx->A->data;
2163     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2164     y    = (Mat_SeqAIJ*)yy->A->data;
2165     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2166     x    = (Mat_SeqAIJ*)xx->B->data;
2167     y    = (Mat_SeqAIJ*)yy->B->data;
2168     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2169     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2170     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2171   } else if (str == SUBSET_NONZERO_PATTERN) {
2172     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2173 
2174     x = (Mat_SeqAIJ*)xx->B->data;
2175     y = (Mat_SeqAIJ*)yy->B->data;
2176     if (y->xtoy && y->XtoY != xx->B) {
2177       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2178       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2179     }
2180     if (!y->xtoy) { /* get xtoy */
2181       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2182       y->XtoY = xx->B;
2183       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2184     }
2185     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2186     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2187   } else {
2188     Mat      B;
2189     PetscInt *nnz_d,*nnz_o;
2190     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2191     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2192     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2193     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2194     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2195     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2196     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2197     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2198     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2199     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2200     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2201     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2202     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2203     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2204   }
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2209 
2210 #undef __FUNCT__
2211 #define __FUNCT__ "MatConjugate_MPIAIJ"
2212 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2213 {
2214 #if defined(PETSC_USE_COMPLEX)
2215   PetscErrorCode ierr;
2216   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2217 
2218   PetscFunctionBegin;
2219   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2220   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2221 #else
2222   PetscFunctionBegin;
2223 #endif
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "MatRealPart_MPIAIJ"
2229 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2230 {
2231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2232   PetscErrorCode ierr;
2233 
2234   PetscFunctionBegin;
2235   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2236   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 #undef __FUNCT__
2241 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2242 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2243 {
2244   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2245   PetscErrorCode ierr;
2246 
2247   PetscFunctionBegin;
2248   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2249   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #if defined(PETSC_HAVE_PBGL)
2254 
2255 #include <boost/parallel/mpi/bsp_process_group.hpp>
2256 #include <boost/graph/distributed/ilu_default_graph.hpp>
2257 #include <boost/graph/distributed/ilu_0_block.hpp>
2258 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2259 #include <boost/graph/distributed/petsc/interface.hpp>
2260 #include <boost/multi_array.hpp>
2261 #include <boost/parallel/distributed_property_map->hpp>
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2265 /*
2266   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2267 */
2268 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2269 {
2270   namespace petsc = boost::distributed::petsc;
2271 
2272   namespace graph_dist = boost::graph::distributed;
2273   using boost::graph::distributed::ilu_default::process_group_type;
2274   using boost::graph::ilu_permuted;
2275 
2276   PetscBool      row_identity, col_identity;
2277   PetscContainer c;
2278   PetscInt       m, n, M, N;
2279   PetscErrorCode ierr;
2280 
2281   PetscFunctionBegin;
2282   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2283   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2284   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2285   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2286 
2287   process_group_type pg;
2288   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2289   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2290   lgraph_type& level_graph = *lgraph_p;
2291   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2292 
2293   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2294   ilu_permuted(level_graph);
2295 
2296   /* put together the new matrix */
2297   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2298   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2299   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2300   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2301   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2302   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2303   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2304   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2305 
2306   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2307   ierr = PetscContainerSetPointer(c, lgraph_p);
2308   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2309   ierr = PetscContainerDestroy(&c);
2310   PetscFunctionReturn(0);
2311 }
2312 
2313 #undef __FUNCT__
2314 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2315 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2316 {
2317   PetscFunctionBegin;
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatSolve_MPIAIJ"
2323 /*
2324   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2325 */
2326 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2327 {
2328   namespace graph_dist = boost::graph::distributed;
2329 
2330   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2331   lgraph_type    *lgraph_p;
2332   PetscContainer c;
2333   PetscErrorCode ierr;
2334 
2335   PetscFunctionBegin;
2336   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2337   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2338   ierr = VecCopy(b, x);CHKERRQ(ierr);
2339 
2340   PetscScalar *array_x;
2341   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2342   PetscInt sx;
2343   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2344 
2345   PetscScalar *array_b;
2346   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2347   PetscInt sb;
2348   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2349 
2350   lgraph_type& level_graph = *lgraph_p;
2351   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2352 
2353   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2354   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2355   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2356 
2357   typedef boost::iterator_property_map<array_ref_type::iterator,
2358                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2359   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2360   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2361 
2362   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2363   PetscFunctionReturn(0);
2364 }
2365 #endif
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2369 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2370 {
2371   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2372   PetscErrorCode ierr;
2373   PetscInt       i,*idxb = 0;
2374   PetscScalar    *va,*vb;
2375   Vec            vtmp;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2379   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2380   if (idx) {
2381     for (i=0; i<A->rmap->n; i++) {
2382       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2383     }
2384   }
2385 
2386   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2387   if (idx) {
2388     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2389   }
2390   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2391   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2392 
2393   for (i=0; i<A->rmap->n; i++) {
2394     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2395       va[i] = vb[i];
2396       if (idx) idx[i] = a->garray[idxb[i]];
2397     }
2398   }
2399 
2400   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2401   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2402   ierr = PetscFree(idxb);CHKERRQ(ierr);
2403   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2409 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2410 {
2411   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2412   PetscErrorCode ierr;
2413   PetscInt       i,*idxb = 0;
2414   PetscScalar    *va,*vb;
2415   Vec            vtmp;
2416 
2417   PetscFunctionBegin;
2418   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2419   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2420   if (idx) {
2421     for (i=0; i<A->cmap->n; i++) {
2422       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2423     }
2424   }
2425 
2426   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2427   if (idx) {
2428     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2429   }
2430   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2431   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2432 
2433   for (i=0; i<A->rmap->n; i++) {
2434     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2435       va[i] = vb[i];
2436       if (idx) idx[i] = a->garray[idxb[i]];
2437     }
2438   }
2439 
2440   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2441   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2442   ierr = PetscFree(idxb);CHKERRQ(ierr);
2443   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2449 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2450 {
2451   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2452   PetscInt       n      = A->rmap->n;
2453   PetscInt       cstart = A->cmap->rstart;
2454   PetscInt       *cmap  = mat->garray;
2455   PetscInt       *diagIdx, *offdiagIdx;
2456   Vec            diagV, offdiagV;
2457   PetscScalar    *a, *diagA, *offdiagA;
2458   PetscInt       r;
2459   PetscErrorCode ierr;
2460 
2461   PetscFunctionBegin;
2462   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2463   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2464   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2465   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2466   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2467   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2468   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2469   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2470   for (r = 0; r < n; ++r) {
2471     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2472       a[r]   = diagA[r];
2473       idx[r] = cstart + diagIdx[r];
2474     } else {
2475       a[r]   = offdiagA[r];
2476       idx[r] = cmap[offdiagIdx[r]];
2477     }
2478   }
2479   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2480   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2481   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2482   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2483   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2484   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 #undef __FUNCT__
2489 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2490 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2491 {
2492   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2493   PetscInt       n      = A->rmap->n;
2494   PetscInt       cstart = A->cmap->rstart;
2495   PetscInt       *cmap  = mat->garray;
2496   PetscInt       *diagIdx, *offdiagIdx;
2497   Vec            diagV, offdiagV;
2498   PetscScalar    *a, *diagA, *offdiagA;
2499   PetscInt       r;
2500   PetscErrorCode ierr;
2501 
2502   PetscFunctionBegin;
2503   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2504   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2505   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2506   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2507   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2508   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2509   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2510   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2511   for (r = 0; r < n; ++r) {
2512     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2513       a[r]   = diagA[r];
2514       idx[r] = cstart + diagIdx[r];
2515     } else {
2516       a[r]   = offdiagA[r];
2517       idx[r] = cmap[offdiagIdx[r]];
2518     }
2519   }
2520   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2521   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2522   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2523   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2524   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2525   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2526   PetscFunctionReturn(0);
2527 }
2528 
2529 #undef __FUNCT__
2530 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2531 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2532 {
2533   PetscErrorCode ierr;
2534   Mat            *dummy;
2535 
2536   PetscFunctionBegin;
2537   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2538   *newmat = *dummy;
2539   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 #undef __FUNCT__
2544 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2545 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2546 {
2547   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2548   PetscErrorCode ierr;
2549 
2550   PetscFunctionBegin;
2551   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 #undef __FUNCT__
2556 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2557 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2558 {
2559   PetscErrorCode ierr;
2560   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2561 
2562   PetscFunctionBegin;
2563   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2564   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2565   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2566   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 /* -------------------------------------------------------------------*/
2571 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2572                                        MatGetRow_MPIAIJ,
2573                                        MatRestoreRow_MPIAIJ,
2574                                        MatMult_MPIAIJ,
2575                                 /* 4*/ MatMultAdd_MPIAIJ,
2576                                        MatMultTranspose_MPIAIJ,
2577                                        MatMultTransposeAdd_MPIAIJ,
2578 #if defined(PETSC_HAVE_PBGL)
2579                                        MatSolve_MPIAIJ,
2580 #else
2581                                        0,
2582 #endif
2583                                        0,
2584                                        0,
2585                                 /*10*/ 0,
2586                                        0,
2587                                        0,
2588                                        MatSOR_MPIAIJ,
2589                                        MatTranspose_MPIAIJ,
2590                                 /*15*/ MatGetInfo_MPIAIJ,
2591                                        MatEqual_MPIAIJ,
2592                                        MatGetDiagonal_MPIAIJ,
2593                                        MatDiagonalScale_MPIAIJ,
2594                                        MatNorm_MPIAIJ,
2595                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2596                                        MatAssemblyEnd_MPIAIJ,
2597                                        MatSetOption_MPIAIJ,
2598                                        MatZeroEntries_MPIAIJ,
2599                                 /*24*/ MatZeroRows_MPIAIJ,
2600                                        0,
2601 #if defined(PETSC_HAVE_PBGL)
2602                                        0,
2603 #else
2604                                        0,
2605 #endif
2606                                        0,
2607                                        0,
2608                                 /*29*/ MatSetUp_MPIAIJ,
2609 #if defined(PETSC_HAVE_PBGL)
2610                                        0,
2611 #else
2612                                        0,
2613 #endif
2614                                        0,
2615                                        0,
2616                                        0,
2617                                 /*34*/ MatDuplicate_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                 /*39*/ MatAXPY_MPIAIJ,
2623                                        MatGetSubMatrices_MPIAIJ,
2624                                        MatIncreaseOverlap_MPIAIJ,
2625                                        MatGetValues_MPIAIJ,
2626                                        MatCopy_MPIAIJ,
2627                                 /*44*/ MatGetRowMax_MPIAIJ,
2628                                        MatScale_MPIAIJ,
2629                                        0,
2630                                        MatDiagonalSet_MPIAIJ,
2631                                        MatZeroRowsColumns_MPIAIJ,
2632                                 /*49*/ MatSetRandom_MPIAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2638                                        0,
2639                                        MatSetUnfactored_MPIAIJ,
2640                                        MatPermute_MPIAIJ,
2641                                        0,
2642                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2643                                        MatDestroy_MPIAIJ,
2644                                        MatView_MPIAIJ,
2645                                        0,
2646                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2647                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2648                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2653                                        MatGetRowMinAbs_MPIAIJ,
2654                                        0,
2655                                        MatSetColoring_MPIAIJ,
2656                                        0,
2657                                        MatSetValuesAdifor_MPIAIJ,
2658                                 /*75*/ MatFDColoringApply_AIJ,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        MatFindZeroDiagonals_MPIAIJ,
2663                                 /*80*/ 0,
2664                                        0,
2665                                        0,
2666                                 /*83*/ MatLoad_MPIAIJ,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2673                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2674                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2675                                        MatPtAP_MPIAIJ_MPIAIJ,
2676                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2677                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                 /*99*/ 0,
2683                                        0,
2684                                        0,
2685                                        MatConjugate_MPIAIJ,
2686                                        0,
2687                                 /*104*/MatSetValuesRow_MPIAIJ,
2688                                        MatRealPart_MPIAIJ,
2689                                        MatImaginaryPart_MPIAIJ,
2690                                        0,
2691                                        0,
2692                                 /*109*/0,
2693                                        0,
2694                                        MatGetRowMin_MPIAIJ,
2695                                        0,
2696                                        0,
2697                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                 /*119*/0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        MatGetMultiProcBlock_MPIAIJ,
2707                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2708                                        MatGetColumnNorms_MPIAIJ,
2709                                        MatInvertBlockDiagonal_MPIAIJ,
2710                                        0,
2711                                        MatGetSubMatricesParallel_MPIAIJ,
2712                                 /*129*/0,
2713                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2714                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2715                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2716                                        0,
2717                                 /*134*/0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                 /*139*/0,
2723                                        0,
2724                                        0,
2725                                        MatFDColoringSetUp_MPIXAIJ,
2726                                        0,
2727                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2728 };
2729 
2730 /* ----------------------------------------------------------------------------------------*/
2731 
2732 #undef __FUNCT__
2733 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2734 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2735 {
2736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2737   PetscErrorCode ierr;
2738 
2739   PetscFunctionBegin;
2740   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2741   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2742   PetscFunctionReturn(0);
2743 }
2744 
2745 #undef __FUNCT__
2746 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2747 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2748 {
2749   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2750   PetscErrorCode ierr;
2751 
2752   PetscFunctionBegin;
2753   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2754   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2755   PetscFunctionReturn(0);
2756 }
2757 
2758 #undef __FUNCT__
2759 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2760 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2761 {
2762   Mat_MPIAIJ     *b;
2763   PetscErrorCode ierr;
2764 
2765   PetscFunctionBegin;
2766   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2767   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2768   b = (Mat_MPIAIJ*)B->data;
2769 
2770   if (!B->preallocated) {
2771     /* Explicitly create 2 MATSEQAIJ matrices. */
2772     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2773     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2774     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2775     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2776     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2777     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2778     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2779     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2780     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2781     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2782   }
2783 
2784   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2785   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2786   B->preallocated = PETSC_TRUE;
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2792 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2793 {
2794   Mat            mat;
2795   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2796   PetscErrorCode ierr;
2797 
2798   PetscFunctionBegin;
2799   *newmat = 0;
2800   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2801   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2802   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2803   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2804   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2805   a       = (Mat_MPIAIJ*)mat->data;
2806 
2807   mat->factortype   = matin->factortype;
2808   mat->assembled    = PETSC_TRUE;
2809   mat->insertmode   = NOT_SET_VALUES;
2810   mat->preallocated = PETSC_TRUE;
2811 
2812   a->size         = oldmat->size;
2813   a->rank         = oldmat->rank;
2814   a->donotstash   = oldmat->donotstash;
2815   a->roworiented  = oldmat->roworiented;
2816   a->rowindices   = 0;
2817   a->rowvalues    = 0;
2818   a->getrowactive = PETSC_FALSE;
2819 
2820   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2821   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2822 
2823   if (oldmat->colmap) {
2824 #if defined(PETSC_USE_CTABLE)
2825     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2826 #else
2827     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
2828     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2829     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2830 #endif
2831   } else a->colmap = 0;
2832   if (oldmat->garray) {
2833     PetscInt len;
2834     len  = oldmat->B->cmap->n;
2835     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
2836     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2837     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2838   } else a->garray = 0;
2839 
2840   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2841   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2842   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2843   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2844   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2845   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2846   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2847   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2848   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2849   *newmat = mat;
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 
2854 
2855 #undef __FUNCT__
2856 #define __FUNCT__ "MatLoad_MPIAIJ"
2857 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2858 {
2859   PetscScalar    *vals,*svals;
2860   MPI_Comm       comm;
2861   PetscErrorCode ierr;
2862   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2863   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2864   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2865   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2866   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2867   int            fd;
2868   PetscInt       bs = 1;
2869 
2870   PetscFunctionBegin;
2871   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2872   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2873   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2874   if (!rank) {
2875     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2876     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2877     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2878   }
2879 
2880   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
2881   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2882   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2883 
2884   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2885 
2886   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2887   M    = header[1]; N = header[2];
2888   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2889   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2890   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2891 
2892   /* If global sizes are set, check if they are consistent with that given in the file */
2893   if (sizesset) {
2894     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2895   }
2896   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2897   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2898 
2899   /* determine ownership of all (block) rows */
2900   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2901   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2902   else m = newMat->rmap->n; /* Set by user */
2903 
2904   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
2905   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2906 
2907   /* First process needs enough room for process with most rows */
2908   if (!rank) {
2909     mmax = rowners[1];
2910     for (i=2; i<=size; i++) {
2911       mmax = PetscMax(mmax, rowners[i]);
2912     }
2913   } else mmax = -1;             /* unused, but compilers complain */
2914 
2915   rowners[0] = 0;
2916   for (i=2; i<=size; i++) {
2917     rowners[i] += rowners[i-1];
2918   }
2919   rstart = rowners[rank];
2920   rend   = rowners[rank+1];
2921 
2922   /* distribute row lengths to all processors */
2923   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2924   if (!rank) {
2925     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2926     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2927     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2928     for (j=0; j<m; j++) {
2929       procsnz[0] += ourlens[j];
2930     }
2931     for (i=1; i<size; i++) {
2932       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2933       /* calculate the number of nonzeros on each processor */
2934       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2935         procsnz[i] += rowlengths[j];
2936       }
2937       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2938     }
2939     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2940   } else {
2941     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2942   }
2943 
2944   if (!rank) {
2945     /* determine max buffer needed and allocate it */
2946     maxnz = 0;
2947     for (i=0; i<size; i++) {
2948       maxnz = PetscMax(maxnz,procsnz[i]);
2949     }
2950     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2951 
2952     /* read in my part of the matrix column indices  */
2953     nz   = procsnz[0];
2954     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2955     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2956 
2957     /* read in every one elses and ship off */
2958     for (i=1; i<size; i++) {
2959       nz   = procsnz[i];
2960       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2961       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2962     }
2963     ierr = PetscFree(cols);CHKERRQ(ierr);
2964   } else {
2965     /* determine buffer space needed for message */
2966     nz = 0;
2967     for (i=0; i<m; i++) {
2968       nz += ourlens[i];
2969     }
2970     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2971 
2972     /* receive message of column indices*/
2973     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2974   }
2975 
2976   /* determine column ownership if matrix is not square */
2977   if (N != M) {
2978     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2979     else n = newMat->cmap->n;
2980     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2981     cstart = cend - n;
2982   } else {
2983     cstart = rstart;
2984     cend   = rend;
2985     n      = cend - cstart;
2986   }
2987 
2988   /* loop over local rows, determining number of off diagonal entries */
2989   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2990   jj   = 0;
2991   for (i=0; i<m; i++) {
2992     for (j=0; j<ourlens[i]; j++) {
2993       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2994       jj++;
2995     }
2996   }
2997 
2998   for (i=0; i<m; i++) {
2999     ourlens[i] -= offlens[i];
3000   }
3001   if (!sizesset) {
3002     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3003   }
3004 
3005   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3006 
3007   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3008 
3009   for (i=0; i<m; i++) {
3010     ourlens[i] += offlens[i];
3011   }
3012 
3013   if (!rank) {
3014     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3015 
3016     /* read in my part of the matrix numerical values  */
3017     nz   = procsnz[0];
3018     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3019 
3020     /* insert into matrix */
3021     jj      = rstart;
3022     smycols = mycols;
3023     svals   = vals;
3024     for (i=0; i<m; i++) {
3025       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3026       smycols += ourlens[i];
3027       svals   += ourlens[i];
3028       jj++;
3029     }
3030 
3031     /* read in other processors and ship out */
3032     for (i=1; i<size; i++) {
3033       nz   = procsnz[i];
3034       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3035       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3036     }
3037     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3038   } else {
3039     /* receive numeric values */
3040     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3041 
3042     /* receive message of values*/
3043     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3044 
3045     /* insert into matrix */
3046     jj      = rstart;
3047     smycols = mycols;
3048     svals   = vals;
3049     for (i=0; i<m; i++) {
3050       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3051       smycols += ourlens[i];
3052       svals   += ourlens[i];
3053       jj++;
3054     }
3055   }
3056   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3057   ierr = PetscFree(vals);CHKERRQ(ierr);
3058   ierr = PetscFree(mycols);CHKERRQ(ierr);
3059   ierr = PetscFree(rowners);CHKERRQ(ierr);
3060   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3061   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3062   PetscFunctionReturn(0);
3063 }
3064 
3065 #undef __FUNCT__
3066 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3067 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3068 {
3069   PetscErrorCode ierr;
3070   IS             iscol_local;
3071   PetscInt       csize;
3072 
3073   PetscFunctionBegin;
3074   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3075   if (call == MAT_REUSE_MATRIX) {
3076     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3077     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3078   } else {
3079     PetscInt cbs;
3080     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3081     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3082     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3083   }
3084   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3085   if (call == MAT_INITIAL_MATRIX) {
3086     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3087     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3088   }
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3093 #undef __FUNCT__
3094 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3095 /*
3096     Not great since it makes two copies of the submatrix, first an SeqAIJ
3097   in local and then by concatenating the local matrices the end result.
3098   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3099 
3100   Note: This requires a sequential iscol with all indices.
3101 */
3102 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3103 {
3104   PetscErrorCode ierr;
3105   PetscMPIInt    rank,size;
3106   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3107   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3108   PetscBool      allcolumns, colflag;
3109   Mat            M,Mreuse;
3110   MatScalar      *vwork,*aa;
3111   MPI_Comm       comm;
3112   Mat_SeqAIJ     *aij;
3113 
3114   PetscFunctionBegin;
3115   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3116   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3117   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3118 
3119   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3120   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3121   if (colflag && ncol == mat->cmap->N) {
3122     allcolumns = PETSC_TRUE;
3123   } else {
3124     allcolumns = PETSC_FALSE;
3125   }
3126   if (call ==  MAT_REUSE_MATRIX) {
3127     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3128     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3129     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3130   } else {
3131     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3132   }
3133 
3134   /*
3135       m - number of local rows
3136       n - number of columns (same on all processors)
3137       rstart - first row in new global matrix generated
3138   */
3139   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3140   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3141   if (call == MAT_INITIAL_MATRIX) {
3142     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3143     ii  = aij->i;
3144     jj  = aij->j;
3145 
3146     /*
3147         Determine the number of non-zeros in the diagonal and off-diagonal
3148         portions of the matrix in order to do correct preallocation
3149     */
3150 
3151     /* first get start and end of "diagonal" columns */
3152     if (csize == PETSC_DECIDE) {
3153       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3154       if (mglobal == n) { /* square matrix */
3155         nlocal = m;
3156       } else {
3157         nlocal = n/size + ((n % size) > rank);
3158       }
3159     } else {
3160       nlocal = csize;
3161     }
3162     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3163     rstart = rend - nlocal;
3164     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3165 
3166     /* next, compute all the lengths */
3167     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3168     olens = dlens + m;
3169     for (i=0; i<m; i++) {
3170       jend = ii[i+1] - ii[i];
3171       olen = 0;
3172       dlen = 0;
3173       for (j=0; j<jend; j++) {
3174         if (*jj < rstart || *jj >= rend) olen++;
3175         else dlen++;
3176         jj++;
3177       }
3178       olens[i] = olen;
3179       dlens[i] = dlen;
3180     }
3181     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3182     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3183     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3184     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3185     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3186     ierr = PetscFree(dlens);CHKERRQ(ierr);
3187   } else {
3188     PetscInt ml,nl;
3189 
3190     M    = *newmat;
3191     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3192     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3193     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3194     /*
3195          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3196        rather than the slower MatSetValues().
3197     */
3198     M->was_assembled = PETSC_TRUE;
3199     M->assembled     = PETSC_FALSE;
3200   }
3201   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3202   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3203   ii   = aij->i;
3204   jj   = aij->j;
3205   aa   = aij->a;
3206   for (i=0; i<m; i++) {
3207     row   = rstart + i;
3208     nz    = ii[i+1] - ii[i];
3209     cwork = jj;     jj += nz;
3210     vwork = aa;     aa += nz;
3211     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3212   }
3213 
3214   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3215   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3216   *newmat = M;
3217 
3218   /* save submatrix used in processor for next request */
3219   if (call ==  MAT_INITIAL_MATRIX) {
3220     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3221     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3222   }
3223   PetscFunctionReturn(0);
3224 }
3225 
3226 #undef __FUNCT__
3227 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3228 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3229 {
3230   PetscInt       m,cstart, cend,j,nnz,i,d;
3231   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3232   const PetscInt *JJ;
3233   PetscScalar    *values;
3234   PetscErrorCode ierr;
3235 
3236   PetscFunctionBegin;
3237   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3238 
3239   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3240   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3241   m      = B->rmap->n;
3242   cstart = B->cmap->rstart;
3243   cend   = B->cmap->rend;
3244   rstart = B->rmap->rstart;
3245 
3246   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3247 
3248 #if defined(PETSC_USE_DEBUGGING)
3249   for (i=0; i<m; i++) {
3250     nnz = Ii[i+1]- Ii[i];
3251     JJ  = J + Ii[i];
3252     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3253     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3254     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3255   }
3256 #endif
3257 
3258   for (i=0; i<m; i++) {
3259     nnz     = Ii[i+1]- Ii[i];
3260     JJ      = J + Ii[i];
3261     nnz_max = PetscMax(nnz_max,nnz);
3262     d       = 0;
3263     for (j=0; j<nnz; j++) {
3264       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3265     }
3266     d_nnz[i] = d;
3267     o_nnz[i] = nnz - d;
3268   }
3269   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3270   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3271 
3272   if (v) values = (PetscScalar*)v;
3273   else {
3274     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3275   }
3276 
3277   for (i=0; i<m; i++) {
3278     ii   = i + rstart;
3279     nnz  = Ii[i+1]- Ii[i];
3280     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3281   }
3282   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3283   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3284 
3285   if (!v) {
3286     ierr = PetscFree(values);CHKERRQ(ierr);
3287   }
3288   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3289   PetscFunctionReturn(0);
3290 }
3291 
3292 #undef __FUNCT__
3293 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3294 /*@
3295    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3296    (the default parallel PETSc format).
3297 
3298    Collective on MPI_Comm
3299 
3300    Input Parameters:
3301 +  B - the matrix
3302 .  i - the indices into j for the start of each local row (starts with zero)
3303 .  j - the column indices for each local row (starts with zero)
3304 -  v - optional values in the matrix
3305 
3306    Level: developer
3307 
3308    Notes:
3309        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3310      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3311      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3312 
3313        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3314 
3315        The format which is used for the sparse matrix input, is equivalent to a
3316     row-major ordering.. i.e for the following matrix, the input data expected is
3317     as shown:
3318 
3319         1 0 0
3320         2 0 3     P0
3321        -------
3322         4 5 6     P1
3323 
3324      Process0 [P0]: rows_owned=[0,1]
3325         i =  {0,1,3}  [size = nrow+1  = 2+1]
3326         j =  {0,0,2}  [size = nz = 6]
3327         v =  {1,2,3}  [size = nz = 6]
3328 
3329      Process1 [P1]: rows_owned=[2]
3330         i =  {0,3}    [size = nrow+1  = 1+1]
3331         j =  {0,1,2}  [size = nz = 6]
3332         v =  {4,5,6}  [size = nz = 6]
3333 
3334 .keywords: matrix, aij, compressed row, sparse, parallel
3335 
3336 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3337           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3338 @*/
3339 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3340 {
3341   PetscErrorCode ierr;
3342 
3343   PetscFunctionBegin;
3344   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3345   PetscFunctionReturn(0);
3346 }
3347 
3348 #undef __FUNCT__
3349 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3350 /*@C
3351    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3352    (the default parallel PETSc format).  For good matrix assembly performance
3353    the user should preallocate the matrix storage by setting the parameters
3354    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3355    performance can be increased by more than a factor of 50.
3356 
3357    Collective on MPI_Comm
3358 
3359    Input Parameters:
3360 +  B - the matrix
3361 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3362            (same value is used for all local rows)
3363 .  d_nnz - array containing the number of nonzeros in the various rows of the
3364            DIAGONAL portion of the local submatrix (possibly different for each row)
3365            or NULL, if d_nz is used to specify the nonzero structure.
3366            The size of this array is equal to the number of local rows, i.e 'm'.
3367            For matrices that will be factored, you must leave room for (and set)
3368            the diagonal entry even if it is zero.
3369 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3370            submatrix (same value is used for all local rows).
3371 -  o_nnz - array containing the number of nonzeros in the various rows of the
3372            OFF-DIAGONAL portion of the local submatrix (possibly different for
3373            each row) or NULL, if o_nz is used to specify the nonzero
3374            structure. The size of this array is equal to the number
3375            of local rows, i.e 'm'.
3376 
3377    If the *_nnz parameter is given then the *_nz parameter is ignored
3378 
3379    The AIJ format (also called the Yale sparse matrix format or
3380    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3381    storage.  The stored row and column indices begin with zero.
3382    See Users-Manual: ch_mat for details.
3383 
3384    The parallel matrix is partitioned such that the first m0 rows belong to
3385    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3386    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3387 
3388    The DIAGONAL portion of the local submatrix of a processor can be defined
3389    as the submatrix which is obtained by extraction the part corresponding to
3390    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3391    first row that belongs to the processor, r2 is the last row belonging to
3392    the this processor, and c1-c2 is range of indices of the local part of a
3393    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3394    common case of a square matrix, the row and column ranges are the same and
3395    the DIAGONAL part is also square. The remaining portion of the local
3396    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3397 
3398    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3399 
3400    You can call MatGetInfo() to get information on how effective the preallocation was;
3401    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3402    You can also run with the option -info and look for messages with the string
3403    malloc in them to see if additional memory allocation was needed.
3404 
3405    Example usage:
3406 
3407    Consider the following 8x8 matrix with 34 non-zero values, that is
3408    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3409    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3410    as follows:
3411 
3412 .vb
3413             1  2  0  |  0  3  0  |  0  4
3414     Proc0   0  5  6  |  7  0  0  |  8  0
3415             9  0 10  | 11  0  0  | 12  0
3416     -------------------------------------
3417            13  0 14  | 15 16 17  |  0  0
3418     Proc1   0 18  0  | 19 20 21  |  0  0
3419             0  0  0  | 22 23  0  | 24  0
3420     -------------------------------------
3421     Proc2  25 26 27  |  0  0 28  | 29  0
3422            30  0  0  | 31 32 33  |  0 34
3423 .ve
3424 
3425    This can be represented as a collection of submatrices as:
3426 
3427 .vb
3428       A B C
3429       D E F
3430       G H I
3431 .ve
3432 
3433    Where the submatrices A,B,C are owned by proc0, D,E,F are
3434    owned by proc1, G,H,I are owned by proc2.
3435 
3436    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3437    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3438    The 'M','N' parameters are 8,8, and have the same values on all procs.
3439 
3440    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3441    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3442    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3443    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3444    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3445    matrix, ans [DF] as another SeqAIJ matrix.
3446 
3447    When d_nz, o_nz parameters are specified, d_nz storage elements are
3448    allocated for every row of the local diagonal submatrix, and o_nz
3449    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3450    One way to choose d_nz and o_nz is to use the max nonzerors per local
3451    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3452    In this case, the values of d_nz,o_nz are:
3453 .vb
3454      proc0 : dnz = 2, o_nz = 2
3455      proc1 : dnz = 3, o_nz = 2
3456      proc2 : dnz = 1, o_nz = 4
3457 .ve
3458    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3459    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3460    for proc3. i.e we are using 12+15+10=37 storage locations to store
3461    34 values.
3462 
3463    When d_nnz, o_nnz parameters are specified, the storage is specified
3464    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3465    In the above case the values for d_nnz,o_nnz are:
3466 .vb
3467      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3468      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3469      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3470 .ve
3471    Here the space allocated is sum of all the above values i.e 34, and
3472    hence pre-allocation is perfect.
3473 
3474    Level: intermediate
3475 
3476 .keywords: matrix, aij, compressed row, sparse, parallel
3477 
3478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3479           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3480 @*/
3481 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3482 {
3483   PetscErrorCode ierr;
3484 
3485   PetscFunctionBegin;
3486   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3487   PetscValidType(B,1);
3488   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3489   PetscFunctionReturn(0);
3490 }
3491 
3492 #undef __FUNCT__
3493 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3494 /*@
3495      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3496          CSR format the local rows.
3497 
3498    Collective on MPI_Comm
3499 
3500    Input Parameters:
3501 +  comm - MPI communicator
3502 .  m - number of local rows (Cannot be PETSC_DECIDE)
3503 .  n - This value should be the same as the local size used in creating the
3504        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3505        calculated if N is given) For square matrices n is almost always m.
3506 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3507 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3508 .   i - row indices
3509 .   j - column indices
3510 -   a - matrix values
3511 
3512    Output Parameter:
3513 .   mat - the matrix
3514 
3515    Level: intermediate
3516 
3517    Notes:
3518        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3519      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3520      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3521 
3522        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3523 
3524        The format which is used for the sparse matrix input, is equivalent to a
3525     row-major ordering.. i.e for the following matrix, the input data expected is
3526     as shown:
3527 
3528         1 0 0
3529         2 0 3     P0
3530        -------
3531         4 5 6     P1
3532 
3533      Process0 [P0]: rows_owned=[0,1]
3534         i =  {0,1,3}  [size = nrow+1  = 2+1]
3535         j =  {0,0,2}  [size = nz = 6]
3536         v =  {1,2,3}  [size = nz = 6]
3537 
3538      Process1 [P1]: rows_owned=[2]
3539         i =  {0,3}    [size = nrow+1  = 1+1]
3540         j =  {0,1,2}  [size = nz = 6]
3541         v =  {4,5,6}  [size = nz = 6]
3542 
3543 .keywords: matrix, aij, compressed row, sparse, parallel
3544 
3545 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3546           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3547 @*/
3548 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3549 {
3550   PetscErrorCode ierr;
3551 
3552   PetscFunctionBegin;
3553   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3554   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3555   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3556   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3557   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3558   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3559   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3560   PetscFunctionReturn(0);
3561 }
3562 
3563 #undef __FUNCT__
3564 #define __FUNCT__ "MatCreateAIJ"
3565 /*@C
3566    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3567    (the default parallel PETSc format).  For good matrix assembly performance
3568    the user should preallocate the matrix storage by setting the parameters
3569    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3570    performance can be increased by more than a factor of 50.
3571 
3572    Collective on MPI_Comm
3573 
3574    Input Parameters:
3575 +  comm - MPI communicator
3576 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3577            This value should be the same as the local size used in creating the
3578            y vector for the matrix-vector product y = Ax.
3579 .  n - This value should be the same as the local size used in creating the
3580        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3581        calculated if N is given) For square matrices n is almost always m.
3582 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3583 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3584 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3585            (same value is used for all local rows)
3586 .  d_nnz - array containing the number of nonzeros in the various rows of the
3587            DIAGONAL portion of the local submatrix (possibly different for each row)
3588            or NULL, if d_nz is used to specify the nonzero structure.
3589            The size of this array is equal to the number of local rows, i.e 'm'.
3590 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3591            submatrix (same value is used for all local rows).
3592 -  o_nnz - array containing the number of nonzeros in the various rows of the
3593            OFF-DIAGONAL portion of the local submatrix (possibly different for
3594            each row) or NULL, if o_nz is used to specify the nonzero
3595            structure. The size of this array is equal to the number
3596            of local rows, i.e 'm'.
3597 
3598    Output Parameter:
3599 .  A - the matrix
3600 
3601    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3602    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3603    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3604 
3605    Notes:
3606    If the *_nnz parameter is given then the *_nz parameter is ignored
3607 
3608    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3609    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3610    storage requirements for this matrix.
3611 
3612    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3613    processor than it must be used on all processors that share the object for
3614    that argument.
3615 
3616    The user MUST specify either the local or global matrix dimensions
3617    (possibly both).
3618 
3619    The parallel matrix is partitioned across processors such that the
3620    first m0 rows belong to process 0, the next m1 rows belong to
3621    process 1, the next m2 rows belong to process 2 etc.. where
3622    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3623    values corresponding to [m x N] submatrix.
3624 
3625    The columns are logically partitioned with the n0 columns belonging
3626    to 0th partition, the next n1 columns belonging to the next
3627    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3628 
3629    The DIAGONAL portion of the local submatrix on any given processor
3630    is the submatrix corresponding to the rows and columns m,n
3631    corresponding to the given processor. i.e diagonal matrix on
3632    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3633    etc. The remaining portion of the local submatrix [m x (N-n)]
3634    constitute the OFF-DIAGONAL portion. The example below better
3635    illustrates this concept.
3636 
3637    For a square global matrix we define each processor's diagonal portion
3638    to be its local rows and the corresponding columns (a square submatrix);
3639    each processor's off-diagonal portion encompasses the remainder of the
3640    local matrix (a rectangular submatrix).
3641 
3642    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3643 
3644    When calling this routine with a single process communicator, a matrix of
3645    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3646    type of communicator, use the construction mechanism:
3647      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3648 
3649    By default, this format uses inodes (identical nodes) when possible.
3650    We search for consecutive rows with the same nonzero structure, thereby
3651    reusing matrix information to achieve increased efficiency.
3652 
3653    Options Database Keys:
3654 +  -mat_no_inode  - Do not use inodes
3655 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3656 -  -mat_aij_oneindex - Internally use indexing starting at 1
3657         rather than 0.  Note that when calling MatSetValues(),
3658         the user still MUST index entries starting at 0!
3659 
3660 
3661    Example usage:
3662 
3663    Consider the following 8x8 matrix with 34 non-zero values, that is
3664    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3665    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3666    as follows:
3667 
3668 .vb
3669             1  2  0  |  0  3  0  |  0  4
3670     Proc0   0  5  6  |  7  0  0  |  8  0
3671             9  0 10  | 11  0  0  | 12  0
3672     -------------------------------------
3673            13  0 14  | 15 16 17  |  0  0
3674     Proc1   0 18  0  | 19 20 21  |  0  0
3675             0  0  0  | 22 23  0  | 24  0
3676     -------------------------------------
3677     Proc2  25 26 27  |  0  0 28  | 29  0
3678            30  0  0  | 31 32 33  |  0 34
3679 .ve
3680 
3681    This can be represented as a collection of submatrices as:
3682 
3683 .vb
3684       A B C
3685       D E F
3686       G H I
3687 .ve
3688 
3689    Where the submatrices A,B,C are owned by proc0, D,E,F are
3690    owned by proc1, G,H,I are owned by proc2.
3691 
3692    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3693    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3694    The 'M','N' parameters are 8,8, and have the same values on all procs.
3695 
3696    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3697    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3698    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3699    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3700    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3701    matrix, ans [DF] as another SeqAIJ matrix.
3702 
3703    When d_nz, o_nz parameters are specified, d_nz storage elements are
3704    allocated for every row of the local diagonal submatrix, and o_nz
3705    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3706    One way to choose d_nz and o_nz is to use the max nonzerors per local
3707    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3708    In this case, the values of d_nz,o_nz are:
3709 .vb
3710      proc0 : dnz = 2, o_nz = 2
3711      proc1 : dnz = 3, o_nz = 2
3712      proc2 : dnz = 1, o_nz = 4
3713 .ve
3714    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3715    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3716    for proc3. i.e we are using 12+15+10=37 storage locations to store
3717    34 values.
3718 
3719    When d_nnz, o_nnz parameters are specified, the storage is specified
3720    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3721    In the above case the values for d_nnz,o_nnz are:
3722 .vb
3723      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3724      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3725      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3726 .ve
3727    Here the space allocated is sum of all the above values i.e 34, and
3728    hence pre-allocation is perfect.
3729 
3730    Level: intermediate
3731 
3732 .keywords: matrix, aij, compressed row, sparse, parallel
3733 
3734 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3735           MPIAIJ, MatCreateMPIAIJWithArrays()
3736 @*/
3737 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3738 {
3739   PetscErrorCode ierr;
3740   PetscMPIInt    size;
3741 
3742   PetscFunctionBegin;
3743   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3744   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3745   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3746   if (size > 1) {
3747     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3748     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3749   } else {
3750     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3751     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3752   }
3753   PetscFunctionReturn(0);
3754 }
3755 
3756 #undef __FUNCT__
3757 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3758 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3759 {
3760   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3761 
3762   PetscFunctionBegin;
3763   if (Ad)     *Ad     = a->A;
3764   if (Ao)     *Ao     = a->B;
3765   if (colmap) *colmap = a->garray;
3766   PetscFunctionReturn(0);
3767 }
3768 
3769 #undef __FUNCT__
3770 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3771 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3772 {
3773   PetscErrorCode ierr;
3774   PetscInt       i;
3775   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3776 
3777   PetscFunctionBegin;
3778   if (coloring->ctype == IS_COLORING_GLOBAL) {
3779     ISColoringValue *allcolors,*colors;
3780     ISColoring      ocoloring;
3781 
3782     /* set coloring for diagonal portion */
3783     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3784 
3785     /* set coloring for off-diagonal portion */
3786     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3787     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
3788     for (i=0; i<a->B->cmap->n; i++) {
3789       colors[i] = allcolors[a->garray[i]];
3790     }
3791     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3792     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3793     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3794     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3795   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3796     ISColoringValue *colors;
3797     PetscInt        *larray;
3798     ISColoring      ocoloring;
3799 
3800     /* set coloring for diagonal portion */
3801     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
3802     for (i=0; i<a->A->cmap->n; i++) {
3803       larray[i] = i + A->cmap->rstart;
3804     }
3805     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3806     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
3807     for (i=0; i<a->A->cmap->n; i++) {
3808       colors[i] = coloring->colors[larray[i]];
3809     }
3810     ierr = PetscFree(larray);CHKERRQ(ierr);
3811     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3812     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3813     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3814 
3815     /* set coloring for off-diagonal portion */
3816     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
3817     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3818     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
3819     for (i=0; i<a->B->cmap->n; i++) {
3820       colors[i] = coloring->colors[larray[i]];
3821     }
3822     ierr = PetscFree(larray);CHKERRQ(ierr);
3823     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3824     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3825     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3826   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3827   PetscFunctionReturn(0);
3828 }
3829 
3830 #undef __FUNCT__
3831 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3832 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3833 {
3834   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3835   PetscErrorCode ierr;
3836 
3837   PetscFunctionBegin;
3838   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3839   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3840   PetscFunctionReturn(0);
3841 }
3842 
3843 #undef __FUNCT__
3844 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3845 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3846 {
3847   PetscErrorCode ierr;
3848   PetscInt       m,N,i,rstart,nnz,Ii;
3849   PetscInt       *indx;
3850   PetscScalar    *values;
3851 
3852   PetscFunctionBegin;
3853   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3854   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3855     PetscInt       *dnz,*onz,sum,bs,cbs;
3856 
3857     if (n == PETSC_DECIDE) {
3858       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3859     }
3860     /* Check sum(n) = N */
3861     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3862     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3863 
3864     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3865     rstart -= m;
3866 
3867     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3868     for (i=0; i<m; i++) {
3869       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3870       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3871       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3872     }
3873 
3874     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3875     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3876     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3877     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3878     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3879     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3880     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3881   }
3882 
3883   /* numeric phase */
3884   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3885   for (i=0; i<m; i++) {
3886     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3887     Ii   = i + rstart;
3888     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3889     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3890   }
3891   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3892   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3893   PetscFunctionReturn(0);
3894 }
3895 
3896 #undef __FUNCT__
3897 #define __FUNCT__ "MatFileSplit"
3898 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3899 {
3900   PetscErrorCode    ierr;
3901   PetscMPIInt       rank;
3902   PetscInt          m,N,i,rstart,nnz;
3903   size_t            len;
3904   const PetscInt    *indx;
3905   PetscViewer       out;
3906   char              *name;
3907   Mat               B;
3908   const PetscScalar *values;
3909 
3910   PetscFunctionBegin;
3911   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3912   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3913   /* Should this be the type of the diagonal block of A? */
3914   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3915   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3916   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3917   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3918   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3919   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3920   for (i=0; i<m; i++) {
3921     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3922     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3923     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3924   }
3925   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3926   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3927 
3928   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3929   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3930   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
3931   sprintf(name,"%s.%d",outfile,rank);
3932   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3933   ierr = PetscFree(name);CHKERRQ(ierr);
3934   ierr = MatView(B,out);CHKERRQ(ierr);
3935   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3936   ierr = MatDestroy(&B);CHKERRQ(ierr);
3937   PetscFunctionReturn(0);
3938 }
3939 
3940 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3941 #undef __FUNCT__
3942 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3943 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3944 {
3945   PetscErrorCode      ierr;
3946   Mat_Merge_SeqsToMPI *merge;
3947   PetscContainer      container;
3948 
3949   PetscFunctionBegin;
3950   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3951   if (container) {
3952     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3953     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3954     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3955     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3956     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3957     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3958     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3959     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3960     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3961     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3962     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3963     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3964     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3965     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3966     ierr = PetscFree(merge);CHKERRQ(ierr);
3967     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3968   }
3969   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3970   PetscFunctionReturn(0);
3971 }
3972 
3973 #include <../src/mat/utils/freespace.h>
3974 #include <petscbt.h>
3975 
3976 #undef __FUNCT__
3977 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3978 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3979 {
3980   PetscErrorCode      ierr;
3981   MPI_Comm            comm;
3982   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3983   PetscMPIInt         size,rank,taga,*len_s;
3984   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3985   PetscInt            proc,m;
3986   PetscInt            **buf_ri,**buf_rj;
3987   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3988   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3989   MPI_Request         *s_waits,*r_waits;
3990   MPI_Status          *status;
3991   MatScalar           *aa=a->a;
3992   MatScalar           **abuf_r,*ba_i;
3993   Mat_Merge_SeqsToMPI *merge;
3994   PetscContainer      container;
3995 
3996   PetscFunctionBegin;
3997   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3998   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3999 
4000   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4001   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4002 
4003   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4004   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4005 
4006   bi     = merge->bi;
4007   bj     = merge->bj;
4008   buf_ri = merge->buf_ri;
4009   buf_rj = merge->buf_rj;
4010 
4011   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4012   owners = merge->rowmap->range;
4013   len_s  = merge->len_s;
4014 
4015   /* send and recv matrix values */
4016   /*-----------------------------*/
4017   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4018   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4019 
4020   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4021   for (proc=0,k=0; proc<size; proc++) {
4022     if (!len_s[proc]) continue;
4023     i    = owners[proc];
4024     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4025     k++;
4026   }
4027 
4028   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4029   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4030   ierr = PetscFree(status);CHKERRQ(ierr);
4031 
4032   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4033   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4034 
4035   /* insert mat values of mpimat */
4036   /*----------------------------*/
4037   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4038   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4039 
4040   for (k=0; k<merge->nrecv; k++) {
4041     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4042     nrows       = *(buf_ri_k[k]);
4043     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4044     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4045   }
4046 
4047   /* set values of ba */
4048   m = merge->rowmap->n;
4049   for (i=0; i<m; i++) {
4050     arow = owners[rank] + i;
4051     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4052     bnzi = bi[i+1] - bi[i];
4053     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4054 
4055     /* add local non-zero vals of this proc's seqmat into ba */
4056     anzi   = ai[arow+1] - ai[arow];
4057     aj     = a->j + ai[arow];
4058     aa     = a->a + ai[arow];
4059     nextaj = 0;
4060     for (j=0; nextaj<anzi; j++) {
4061       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4062         ba_i[j] += aa[nextaj++];
4063       }
4064     }
4065 
4066     /* add received vals into ba */
4067     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4068       /* i-th row */
4069       if (i == *nextrow[k]) {
4070         anzi   = *(nextai[k]+1) - *nextai[k];
4071         aj     = buf_rj[k] + *(nextai[k]);
4072         aa     = abuf_r[k] + *(nextai[k]);
4073         nextaj = 0;
4074         for (j=0; nextaj<anzi; j++) {
4075           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4076             ba_i[j] += aa[nextaj++];
4077           }
4078         }
4079         nextrow[k]++; nextai[k]++;
4080       }
4081     }
4082     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4083   }
4084   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4085   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4086 
4087   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4088   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4089   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4090   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4091   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4092   PetscFunctionReturn(0);
4093 }
4094 
4095 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4096 
4097 #undef __FUNCT__
4098 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4099 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4100 {
4101   PetscErrorCode      ierr;
4102   Mat                 B_mpi;
4103   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4104   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4105   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4106   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4107   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4108   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4109   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4110   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4111   MPI_Status          *status;
4112   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4113   PetscBT             lnkbt;
4114   Mat_Merge_SeqsToMPI *merge;
4115   PetscContainer      container;
4116 
4117   PetscFunctionBegin;
4118   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4119 
4120   /* make sure it is a PETSc comm */
4121   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4122   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4123   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4124 
4125   ierr = PetscNew(&merge);CHKERRQ(ierr);
4126   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4127 
4128   /* determine row ownership */
4129   /*---------------------------------------------------------*/
4130   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4131   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4132   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4133   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4134   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4135   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4136   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4137 
4138   m      = merge->rowmap->n;
4139   owners = merge->rowmap->range;
4140 
4141   /* determine the number of messages to send, their lengths */
4142   /*---------------------------------------------------------*/
4143   len_s = merge->len_s;
4144 
4145   len          = 0; /* length of buf_si[] */
4146   merge->nsend = 0;
4147   for (proc=0; proc<size; proc++) {
4148     len_si[proc] = 0;
4149     if (proc == rank) {
4150       len_s[proc] = 0;
4151     } else {
4152       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4153       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4154     }
4155     if (len_s[proc]) {
4156       merge->nsend++;
4157       nrows = 0;
4158       for (i=owners[proc]; i<owners[proc+1]; i++) {
4159         if (ai[i+1] > ai[i]) nrows++;
4160       }
4161       len_si[proc] = 2*(nrows+1);
4162       len         += len_si[proc];
4163     }
4164   }
4165 
4166   /* determine the number and length of messages to receive for ij-structure */
4167   /*-------------------------------------------------------------------------*/
4168   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4169   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4170 
4171   /* post the Irecv of j-structure */
4172   /*-------------------------------*/
4173   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4174   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4175 
4176   /* post the Isend of j-structure */
4177   /*--------------------------------*/
4178   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4179 
4180   for (proc=0, k=0; proc<size; proc++) {
4181     if (!len_s[proc]) continue;
4182     i    = owners[proc];
4183     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4184     k++;
4185   }
4186 
4187   /* receives and sends of j-structure are complete */
4188   /*------------------------------------------------*/
4189   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4190   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4191 
4192   /* send and recv i-structure */
4193   /*---------------------------*/
4194   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4195   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4196 
4197   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4198   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4199   for (proc=0,k=0; proc<size; proc++) {
4200     if (!len_s[proc]) continue;
4201     /* form outgoing message for i-structure:
4202          buf_si[0]:                 nrows to be sent
4203                [1:nrows]:           row index (global)
4204                [nrows+1:2*nrows+1]: i-structure index
4205     */
4206     /*-------------------------------------------*/
4207     nrows       = len_si[proc]/2 - 1;
4208     buf_si_i    = buf_si + nrows+1;
4209     buf_si[0]   = nrows;
4210     buf_si_i[0] = 0;
4211     nrows       = 0;
4212     for (i=owners[proc]; i<owners[proc+1]; i++) {
4213       anzi = ai[i+1] - ai[i];
4214       if (anzi) {
4215         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4216         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4217         nrows++;
4218       }
4219     }
4220     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4221     k++;
4222     buf_si += len_si[proc];
4223   }
4224 
4225   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4226   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4227 
4228   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4229   for (i=0; i<merge->nrecv; i++) {
4230     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4231   }
4232 
4233   ierr = PetscFree(len_si);CHKERRQ(ierr);
4234   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4235   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4236   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4237   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4238   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4239   ierr = PetscFree(status);CHKERRQ(ierr);
4240 
4241   /* compute a local seq matrix in each processor */
4242   /*----------------------------------------------*/
4243   /* allocate bi array and free space for accumulating nonzero column info */
4244   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4245   bi[0] = 0;
4246 
4247   /* create and initialize a linked list */
4248   nlnk = N+1;
4249   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4250 
4251   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4252   len  = ai[owners[rank+1]] - ai[owners[rank]];
4253   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4254 
4255   current_space = free_space;
4256 
4257   /* determine symbolic info for each local row */
4258   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4259 
4260   for (k=0; k<merge->nrecv; k++) {
4261     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4262     nrows       = *buf_ri_k[k];
4263     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4264     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4265   }
4266 
4267   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4268   len  = 0;
4269   for (i=0; i<m; i++) {
4270     bnzi = 0;
4271     /* add local non-zero cols of this proc's seqmat into lnk */
4272     arow  = owners[rank] + i;
4273     anzi  = ai[arow+1] - ai[arow];
4274     aj    = a->j + ai[arow];
4275     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4276     bnzi += nlnk;
4277     /* add received col data into lnk */
4278     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4279       if (i == *nextrow[k]) { /* i-th row */
4280         anzi  = *(nextai[k]+1) - *nextai[k];
4281         aj    = buf_rj[k] + *nextai[k];
4282         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4283         bnzi += nlnk;
4284         nextrow[k]++; nextai[k]++;
4285       }
4286     }
4287     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4288 
4289     /* if free space is not available, make more free space */
4290     if (current_space->local_remaining<bnzi) {
4291       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4292       nspacedouble++;
4293     }
4294     /* copy data into free space, then initialize lnk */
4295     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4296     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4297 
4298     current_space->array           += bnzi;
4299     current_space->local_used      += bnzi;
4300     current_space->local_remaining -= bnzi;
4301 
4302     bi[i+1] = bi[i] + bnzi;
4303   }
4304 
4305   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4306 
4307   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4308   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4309   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4310 
4311   /* create symbolic parallel matrix B_mpi */
4312   /*---------------------------------------*/
4313   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4314   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4315   if (n==PETSC_DECIDE) {
4316     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4317   } else {
4318     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4319   }
4320   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4321   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4322   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4323   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4324   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4325 
4326   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4327   B_mpi->assembled    = PETSC_FALSE;
4328   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4329   merge->bi           = bi;
4330   merge->bj           = bj;
4331   merge->buf_ri       = buf_ri;
4332   merge->buf_rj       = buf_rj;
4333   merge->coi          = NULL;
4334   merge->coj          = NULL;
4335   merge->owners_co    = NULL;
4336 
4337   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4338 
4339   /* attach the supporting struct to B_mpi for reuse */
4340   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4341   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4342   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4343   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4344   *mpimat = B_mpi;
4345 
4346   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4347   PetscFunctionReturn(0);
4348 }
4349 
4350 #undef __FUNCT__
4351 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4352 /*@C
4353       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4354                  matrices from each processor
4355 
4356     Collective on MPI_Comm
4357 
4358    Input Parameters:
4359 +    comm - the communicators the parallel matrix will live on
4360 .    seqmat - the input sequential matrices
4361 .    m - number of local rows (or PETSC_DECIDE)
4362 .    n - number of local columns (or PETSC_DECIDE)
4363 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4364 
4365    Output Parameter:
4366 .    mpimat - the parallel matrix generated
4367 
4368     Level: advanced
4369 
4370    Notes:
4371      The dimensions of the sequential matrix in each processor MUST be the same.
4372      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4373      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4374 @*/
4375 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4376 {
4377   PetscErrorCode ierr;
4378   PetscMPIInt    size;
4379 
4380   PetscFunctionBegin;
4381   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4382   if (size == 1) {
4383     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4384     if (scall == MAT_INITIAL_MATRIX) {
4385       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4386     } else {
4387       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4388     }
4389     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4390     PetscFunctionReturn(0);
4391   }
4392   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4393   if (scall == MAT_INITIAL_MATRIX) {
4394     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4395   }
4396   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4397   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4398   PetscFunctionReturn(0);
4399 }
4400 
4401 #undef __FUNCT__
4402 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4403 /*@
4404      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4405           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4406           with MatGetSize()
4407 
4408     Not Collective
4409 
4410    Input Parameters:
4411 +    A - the matrix
4412 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4413 
4414    Output Parameter:
4415 .    A_loc - the local sequential matrix generated
4416 
4417     Level: developer
4418 
4419 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4420 
4421 @*/
4422 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4423 {
4424   PetscErrorCode ierr;
4425   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4426   Mat_SeqAIJ     *mat,*a,*b;
4427   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4428   MatScalar      *aa,*ba,*cam;
4429   PetscScalar    *ca;
4430   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4431   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4432   PetscBool      match;
4433   MPI_Comm       comm;
4434   PetscMPIInt    size;
4435 
4436   PetscFunctionBegin;
4437   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4438   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4439   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4440   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4441   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4442 
4443   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4444   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4445   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4446   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4447   aa = a->a; ba = b->a;
4448   if (scall == MAT_INITIAL_MATRIX) {
4449     if (size == 1) {
4450       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4451       PetscFunctionReturn(0);
4452     }
4453 
4454     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4455     ci[0] = 0;
4456     for (i=0; i<am; i++) {
4457       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4458     }
4459     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4460     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4461     k    = 0;
4462     for (i=0; i<am; i++) {
4463       ncols_o = bi[i+1] - bi[i];
4464       ncols_d = ai[i+1] - ai[i];
4465       /* off-diagonal portion of A */
4466       for (jo=0; jo<ncols_o; jo++) {
4467         col = cmap[*bj];
4468         if (col >= cstart) break;
4469         cj[k]   = col; bj++;
4470         ca[k++] = *ba++;
4471       }
4472       /* diagonal portion of A */
4473       for (j=0; j<ncols_d; j++) {
4474         cj[k]   = cstart + *aj++;
4475         ca[k++] = *aa++;
4476       }
4477       /* off-diagonal portion of A */
4478       for (j=jo; j<ncols_o; j++) {
4479         cj[k]   = cmap[*bj++];
4480         ca[k++] = *ba++;
4481       }
4482     }
4483     /* put together the new matrix */
4484     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4485     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4486     /* Since these are PETSc arrays, change flags to free them as necessary. */
4487     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4488     mat->free_a  = PETSC_TRUE;
4489     mat->free_ij = PETSC_TRUE;
4490     mat->nonew   = 0;
4491   } else if (scall == MAT_REUSE_MATRIX) {
4492     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4493     ci = mat->i; cj = mat->j; cam = mat->a;
4494     for (i=0; i<am; i++) {
4495       /* off-diagonal portion of A */
4496       ncols_o = bi[i+1] - bi[i];
4497       for (jo=0; jo<ncols_o; jo++) {
4498         col = cmap[*bj];
4499         if (col >= cstart) break;
4500         *cam++ = *ba++; bj++;
4501       }
4502       /* diagonal portion of A */
4503       ncols_d = ai[i+1] - ai[i];
4504       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4505       /* off-diagonal portion of A */
4506       for (j=jo; j<ncols_o; j++) {
4507         *cam++ = *ba++; bj++;
4508       }
4509     }
4510   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4511   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4512   PetscFunctionReturn(0);
4513 }
4514 
4515 #undef __FUNCT__
4516 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4517 /*@C
4518      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4519 
4520     Not Collective
4521 
4522    Input Parameters:
4523 +    A - the matrix
4524 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4525 -    row, col - index sets of rows and columns to extract (or NULL)
4526 
4527    Output Parameter:
4528 .    A_loc - the local sequential matrix generated
4529 
4530     Level: developer
4531 
4532 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4533 
4534 @*/
4535 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4536 {
4537   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4538   PetscErrorCode ierr;
4539   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4540   IS             isrowa,iscola;
4541   Mat            *aloc;
4542   PetscBool      match;
4543 
4544   PetscFunctionBegin;
4545   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4546   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4547   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4548   if (!row) {
4549     start = A->rmap->rstart; end = A->rmap->rend;
4550     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4551   } else {
4552     isrowa = *row;
4553   }
4554   if (!col) {
4555     start = A->cmap->rstart;
4556     cmap  = a->garray;
4557     nzA   = a->A->cmap->n;
4558     nzB   = a->B->cmap->n;
4559     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
4560     ncols = 0;
4561     for (i=0; i<nzB; i++) {
4562       if (cmap[i] < start) idx[ncols++] = cmap[i];
4563       else break;
4564     }
4565     imark = i;
4566     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4567     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4568     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4569   } else {
4570     iscola = *col;
4571   }
4572   if (scall != MAT_INITIAL_MATRIX) {
4573     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4574     aloc[0] = *A_loc;
4575   }
4576   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4577   *A_loc = aloc[0];
4578   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4579   if (!row) {
4580     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4581   }
4582   if (!col) {
4583     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4584   }
4585   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4586   PetscFunctionReturn(0);
4587 }
4588 
4589 #undef __FUNCT__
4590 #define __FUNCT__ "MatGetBrowsOfAcols"
4591 /*@C
4592     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4593 
4594     Collective on Mat
4595 
4596    Input Parameters:
4597 +    A,B - the matrices in mpiaij format
4598 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4599 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4600 
4601    Output Parameter:
4602 +    rowb, colb - index sets of rows and columns of B to extract
4603 -    B_seq - the sequential matrix generated
4604 
4605     Level: developer
4606 
4607 @*/
4608 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4609 {
4610   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4611   PetscErrorCode ierr;
4612   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4613   IS             isrowb,iscolb;
4614   Mat            *bseq=NULL;
4615 
4616   PetscFunctionBegin;
4617   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4618     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4619   }
4620   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4621 
4622   if (scall == MAT_INITIAL_MATRIX) {
4623     start = A->cmap->rstart;
4624     cmap  = a->garray;
4625     nzA   = a->A->cmap->n;
4626     nzB   = a->B->cmap->n;
4627     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
4628     ncols = 0;
4629     for (i=0; i<nzB; i++) {  /* row < local row index */
4630       if (cmap[i] < start) idx[ncols++] = cmap[i];
4631       else break;
4632     }
4633     imark = i;
4634     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4635     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4636     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4637     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4638   } else {
4639     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4640     isrowb  = *rowb; iscolb = *colb;
4641     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4642     bseq[0] = *B_seq;
4643   }
4644   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4645   *B_seq = bseq[0];
4646   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4647   if (!rowb) {
4648     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4649   } else {
4650     *rowb = isrowb;
4651   }
4652   if (!colb) {
4653     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4654   } else {
4655     *colb = iscolb;
4656   }
4657   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4658   PetscFunctionReturn(0);
4659 }
4660 
4661 #undef __FUNCT__
4662 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4663 /*
4664     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4665     of the OFF-DIAGONAL portion of local A
4666 
4667     Collective on Mat
4668 
4669    Input Parameters:
4670 +    A,B - the matrices in mpiaij format
4671 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4672 
4673    Output Parameter:
4674 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4675 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4676 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4677 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4678 
4679     Level: developer
4680 
4681 */
4682 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4683 {
4684   VecScatter_MPI_General *gen_to,*gen_from;
4685   PetscErrorCode         ierr;
4686   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4687   Mat_SeqAIJ             *b_oth;
4688   VecScatter             ctx =a->Mvctx;
4689   MPI_Comm               comm;
4690   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4691   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4692   PetscScalar            *rvalues,*svalues;
4693   MatScalar              *b_otha,*bufa,*bufA;
4694   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4695   MPI_Request            *rwaits = NULL,*swaits = NULL;
4696   MPI_Status             *sstatus,rstatus;
4697   PetscMPIInt            jj,size;
4698   PetscInt               *cols,sbs,rbs;
4699   PetscScalar            *vals;
4700 
4701   PetscFunctionBegin;
4702   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4703   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4704   if (size == 1) PetscFunctionReturn(0);
4705 
4706   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4707     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4708   }
4709   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4710   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4711 
4712   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4713   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4714   rvalues  = gen_from->values; /* holds the length of receiving row */
4715   svalues  = gen_to->values;   /* holds the length of sending row */
4716   nrecvs   = gen_from->n;
4717   nsends   = gen_to->n;
4718 
4719   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4720   srow    = gen_to->indices;    /* local row index to be sent */
4721   sstarts = gen_to->starts;
4722   sprocs  = gen_to->procs;
4723   sstatus = gen_to->sstatus;
4724   sbs     = gen_to->bs;
4725   rstarts = gen_from->starts;
4726   rprocs  = gen_from->procs;
4727   rbs     = gen_from->bs;
4728 
4729   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4730   if (scall == MAT_INITIAL_MATRIX) {
4731     /* i-array */
4732     /*---------*/
4733     /*  post receives */
4734     for (i=0; i<nrecvs; i++) {
4735       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4736       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4737       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4738     }
4739 
4740     /* pack the outgoing message */
4741     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4742 
4743     sstartsj[0] = 0;
4744     rstartsj[0] = 0;
4745     len         = 0; /* total length of j or a array to be sent */
4746     k           = 0;
4747     for (i=0; i<nsends; i++) {
4748       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4749       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4750       for (j=0; j<nrows; j++) {
4751         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4752         for (l=0; l<sbs; l++) {
4753           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4754 
4755           rowlen[j*sbs+l] = ncols;
4756 
4757           len += ncols;
4758           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4759         }
4760         k++;
4761       }
4762       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4763 
4764       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4765     }
4766     /* recvs and sends of i-array are completed */
4767     i = nrecvs;
4768     while (i--) {
4769       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4770     }
4771     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4772 
4773     /* allocate buffers for sending j and a arrays */
4774     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
4775     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
4776 
4777     /* create i-array of B_oth */
4778     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
4779 
4780     b_othi[0] = 0;
4781     len       = 0; /* total length of j or a array to be received */
4782     k         = 0;
4783     for (i=0; i<nrecvs; i++) {
4784       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4785       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4786       for (j=0; j<nrows; j++) {
4787         b_othi[k+1] = b_othi[k] + rowlen[j];
4788         len        += rowlen[j]; k++;
4789       }
4790       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4791     }
4792 
4793     /* allocate space for j and a arrrays of B_oth */
4794     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
4795     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
4796 
4797     /* j-array */
4798     /*---------*/
4799     /*  post receives of j-array */
4800     for (i=0; i<nrecvs; i++) {
4801       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4802       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4803     }
4804 
4805     /* pack the outgoing message j-array */
4806     k = 0;
4807     for (i=0; i<nsends; i++) {
4808       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4809       bufJ  = bufj+sstartsj[i];
4810       for (j=0; j<nrows; j++) {
4811         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4812         for (ll=0; ll<sbs; ll++) {
4813           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4814           for (l=0; l<ncols; l++) {
4815             *bufJ++ = cols[l];
4816           }
4817           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4818         }
4819       }
4820       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4821     }
4822 
4823     /* recvs and sends of j-array are completed */
4824     i = nrecvs;
4825     while (i--) {
4826       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4827     }
4828     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4829   } else if (scall == MAT_REUSE_MATRIX) {
4830     sstartsj = *startsj_s;
4831     rstartsj = *startsj_r;
4832     bufa     = *bufa_ptr;
4833     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4834     b_otha   = b_oth->a;
4835   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4836 
4837   /* a-array */
4838   /*---------*/
4839   /*  post receives of a-array */
4840   for (i=0; i<nrecvs; i++) {
4841     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4842     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4843   }
4844 
4845   /* pack the outgoing message a-array */
4846   k = 0;
4847   for (i=0; i<nsends; i++) {
4848     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4849     bufA  = bufa+sstartsj[i];
4850     for (j=0; j<nrows; j++) {
4851       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4852       for (ll=0; ll<sbs; ll++) {
4853         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4854         for (l=0; l<ncols; l++) {
4855           *bufA++ = vals[l];
4856         }
4857         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4858       }
4859     }
4860     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4861   }
4862   /* recvs and sends of a-array are completed */
4863   i = nrecvs;
4864   while (i--) {
4865     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4866   }
4867   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4868   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4869 
4870   if (scall == MAT_INITIAL_MATRIX) {
4871     /* put together the new matrix */
4872     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4873 
4874     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4875     /* Since these are PETSc arrays, change flags to free them as necessary. */
4876     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4877     b_oth->free_a  = PETSC_TRUE;
4878     b_oth->free_ij = PETSC_TRUE;
4879     b_oth->nonew   = 0;
4880 
4881     ierr = PetscFree(bufj);CHKERRQ(ierr);
4882     if (!startsj_s || !bufa_ptr) {
4883       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4884       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4885     } else {
4886       *startsj_s = sstartsj;
4887       *startsj_r = rstartsj;
4888       *bufa_ptr  = bufa;
4889     }
4890   }
4891   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4892   PetscFunctionReturn(0);
4893 }
4894 
4895 #undef __FUNCT__
4896 #define __FUNCT__ "MatGetCommunicationStructs"
4897 /*@C
4898   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4899 
4900   Not Collective
4901 
4902   Input Parameters:
4903 . A - The matrix in mpiaij format
4904 
4905   Output Parameter:
4906 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4907 . colmap - A map from global column index to local index into lvec
4908 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4909 
4910   Level: developer
4911 
4912 @*/
4913 #if defined(PETSC_USE_CTABLE)
4914 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4915 #else
4916 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4917 #endif
4918 {
4919   Mat_MPIAIJ *a;
4920 
4921   PetscFunctionBegin;
4922   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4923   PetscValidPointer(lvec, 2);
4924   PetscValidPointer(colmap, 3);
4925   PetscValidPointer(multScatter, 4);
4926   a = (Mat_MPIAIJ*) A->data;
4927   if (lvec) *lvec = a->lvec;
4928   if (colmap) *colmap = a->colmap;
4929   if (multScatter) *multScatter = a->Mvctx;
4930   PetscFunctionReturn(0);
4931 }
4932 
4933 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4934 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4935 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4936 #if defined(PETSC_HAVE_ELEMENTAL)
4937 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4938 #endif
4939 
4940 #undef __FUNCT__
4941 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4942 /*
4943     Computes (B'*A')' since computing B*A directly is untenable
4944 
4945                n                       p                          p
4946         (              )       (              )         (                  )
4947       m (      A       )  *  n (       B      )   =   m (         C        )
4948         (              )       (              )         (                  )
4949 
4950 */
4951 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4952 {
4953   PetscErrorCode ierr;
4954   Mat            At,Bt,Ct;
4955 
4956   PetscFunctionBegin;
4957   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4958   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4959   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4960   ierr = MatDestroy(&At);CHKERRQ(ierr);
4961   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4962   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4963   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4964   PetscFunctionReturn(0);
4965 }
4966 
4967 #undef __FUNCT__
4968 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4969 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4970 {
4971   PetscErrorCode ierr;
4972   PetscInt       m=A->rmap->n,n=B->cmap->n;
4973   Mat            Cmat;
4974 
4975   PetscFunctionBegin;
4976   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4977   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4978   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4979   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4980   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4981   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4982   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4983   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4984 
4985   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4986 
4987   *C = Cmat;
4988   PetscFunctionReturn(0);
4989 }
4990 
4991 /* ----------------------------------------------------------------*/
4992 #undef __FUNCT__
4993 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4994 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4995 {
4996   PetscErrorCode ierr;
4997 
4998   PetscFunctionBegin;
4999   if (scall == MAT_INITIAL_MATRIX) {
5000     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5001     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5002     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5003   }
5004   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5005   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5006   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5007   PetscFunctionReturn(0);
5008 }
5009 
5010 #if defined(PETSC_HAVE_MUMPS)
5011 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5012 #endif
5013 #if defined(PETSC_HAVE_PASTIX)
5014 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5015 #endif
5016 #if defined(PETSC_HAVE_SUPERLU_DIST)
5017 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5018 #endif
5019 #if defined(PETSC_HAVE_CLIQUE)
5020 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5021 #endif
5022 
5023 /*MC
5024    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5025 
5026    Options Database Keys:
5027 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5028 
5029   Level: beginner
5030 
5031 .seealso: MatCreateAIJ()
5032 M*/
5033 
5034 #undef __FUNCT__
5035 #define __FUNCT__ "MatCreate_MPIAIJ"
5036 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5037 {
5038   Mat_MPIAIJ     *b;
5039   PetscErrorCode ierr;
5040   PetscMPIInt    size;
5041 
5042   PetscFunctionBegin;
5043   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5044 
5045   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5046   B->data       = (void*)b;
5047   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5048   B->assembled  = PETSC_FALSE;
5049   B->insertmode = NOT_SET_VALUES;
5050   b->size       = size;
5051 
5052   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5053 
5054   /* build cache for off array entries formed */
5055   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5056 
5057   b->donotstash  = PETSC_FALSE;
5058   b->colmap      = 0;
5059   b->garray      = 0;
5060   b->roworiented = PETSC_TRUE;
5061 
5062   /* stuff used for matrix vector multiply */
5063   b->lvec  = NULL;
5064   b->Mvctx = NULL;
5065 
5066   /* stuff for MatGetRow() */
5067   b->rowindices   = 0;
5068   b->rowvalues    = 0;
5069   b->getrowactive = PETSC_FALSE;
5070 
5071   /* flexible pointer used in CUSP/CUSPARSE classes */
5072   b->spptr = NULL;
5073 
5074 #if defined(PETSC_HAVE_MUMPS)
5075   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5076 #endif
5077 #if defined(PETSC_HAVE_PASTIX)
5078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5079 #endif
5080 #if defined(PETSC_HAVE_SUPERLU_DIST)
5081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5082 #endif
5083 #if defined(PETSC_HAVE_CLIQUE)
5084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5085 #endif
5086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5096 #if defined(PETSC_HAVE_ELEMENTAL)
5097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5098 #endif
5099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5102   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5103   PetscFunctionReturn(0);
5104 }
5105 
5106 #undef __FUNCT__
5107 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5108 /*@
5109      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5110          and "off-diagonal" part of the matrix in CSR format.
5111 
5112    Collective on MPI_Comm
5113 
5114    Input Parameters:
5115 +  comm - MPI communicator
5116 .  m - number of local rows (Cannot be PETSC_DECIDE)
5117 .  n - This value should be the same as the local size used in creating the
5118        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5119        calculated if N is given) For square matrices n is almost always m.
5120 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5121 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5122 .   i - row indices for "diagonal" portion of matrix
5123 .   j - column indices
5124 .   a - matrix values
5125 .   oi - row indices for "off-diagonal" portion of matrix
5126 .   oj - column indices
5127 -   oa - matrix values
5128 
5129    Output Parameter:
5130 .   mat - the matrix
5131 
5132    Level: advanced
5133 
5134    Notes:
5135        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5136        must free the arrays once the matrix has been destroyed and not before.
5137 
5138        The i and j indices are 0 based
5139 
5140        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5141 
5142        This sets local rows and cannot be used to set off-processor values.
5143 
5144        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5145        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5146        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5147        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5148        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5149        communication if it is known that only local entries will be set.
5150 
5151 .keywords: matrix, aij, compressed row, sparse, parallel
5152 
5153 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5154           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5155 @*/
5156 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5157 {
5158   PetscErrorCode ierr;
5159   Mat_MPIAIJ     *maij;
5160 
5161   PetscFunctionBegin;
5162   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5163   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5164   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5165   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5166   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5167   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5168   maij = (Mat_MPIAIJ*) (*mat)->data;
5169 
5170   (*mat)->preallocated = PETSC_TRUE;
5171 
5172   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5173   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5174 
5175   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5176   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5177 
5178   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5179   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5180   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5181   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5182 
5183   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5184   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5185   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5186   PetscFunctionReturn(0);
5187 }
5188 
5189 /*
5190     Special version for direct calls from Fortran
5191 */
5192 #include <petsc-private/fortranimpl.h>
5193 
5194 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5195 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5196 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5197 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5198 #endif
5199 
5200 /* Change these macros so can be used in void function */
5201 #undef CHKERRQ
5202 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5203 #undef SETERRQ2
5204 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5205 #undef SETERRQ3
5206 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5207 #undef SETERRQ
5208 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5209 
5210 #undef __FUNCT__
5211 #define __FUNCT__ "matsetvaluesmpiaij_"
5212 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5213 {
5214   Mat            mat  = *mmat;
5215   PetscInt       m    = *mm, n = *mn;
5216   InsertMode     addv = *maddv;
5217   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5218   PetscScalar    value;
5219   PetscErrorCode ierr;
5220 
5221   MatCheckPreallocated(mat,1);
5222   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5223 
5224 #if defined(PETSC_USE_DEBUG)
5225   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5226 #endif
5227   {
5228     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5229     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5230     PetscBool roworiented = aij->roworiented;
5231 
5232     /* Some Variables required in the macro */
5233     Mat        A                 = aij->A;
5234     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5235     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5236     MatScalar  *aa               = a->a;
5237     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5238     Mat        B                 = aij->B;
5239     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5240     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5241     MatScalar  *ba               = b->a;
5242 
5243     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5244     PetscInt  nonew = a->nonew;
5245     MatScalar *ap1,*ap2;
5246 
5247     PetscFunctionBegin;
5248     for (i=0; i<m; i++) {
5249       if (im[i] < 0) continue;
5250 #if defined(PETSC_USE_DEBUG)
5251       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5252 #endif
5253       if (im[i] >= rstart && im[i] < rend) {
5254         row      = im[i] - rstart;
5255         lastcol1 = -1;
5256         rp1      = aj + ai[row];
5257         ap1      = aa + ai[row];
5258         rmax1    = aimax[row];
5259         nrow1    = ailen[row];
5260         low1     = 0;
5261         high1    = nrow1;
5262         lastcol2 = -1;
5263         rp2      = bj + bi[row];
5264         ap2      = ba + bi[row];
5265         rmax2    = bimax[row];
5266         nrow2    = bilen[row];
5267         low2     = 0;
5268         high2    = nrow2;
5269 
5270         for (j=0; j<n; j++) {
5271           if (roworiented) value = v[i*n+j];
5272           else value = v[i+j*m];
5273           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5274           if (in[j] >= cstart && in[j] < cend) {
5275             col = in[j] - cstart;
5276             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5277           } else if (in[j] < 0) continue;
5278 #if defined(PETSC_USE_DEBUG)
5279           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5280 #endif
5281           else {
5282             if (mat->was_assembled) {
5283               if (!aij->colmap) {
5284                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5285               }
5286 #if defined(PETSC_USE_CTABLE)
5287               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5288               col--;
5289 #else
5290               col = aij->colmap[in[j]] - 1;
5291 #endif
5292               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5293                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5294                 col  =  in[j];
5295                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5296                 B     = aij->B;
5297                 b     = (Mat_SeqAIJ*)B->data;
5298                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5299                 rp2   = bj + bi[row];
5300                 ap2   = ba + bi[row];
5301                 rmax2 = bimax[row];
5302                 nrow2 = bilen[row];
5303                 low2  = 0;
5304                 high2 = nrow2;
5305                 bm    = aij->B->rmap->n;
5306                 ba    = b->a;
5307               }
5308             } else col = in[j];
5309             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5310           }
5311         }
5312       } else if (!aij->donotstash) {
5313         if (roworiented) {
5314           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5315         } else {
5316           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5317         }
5318       }
5319     }
5320   }
5321   PetscFunctionReturnVoid();
5322 }
5323 
5324