xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision aae456db49e72798f90b49c19f8e8ffd0b71bcb0)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
109 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
110 {
111   PetscErrorCode    ierr;
112   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
113 
114   PetscFunctionBegin;
115   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
116     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
117   } else {
118     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
119   }
120   PetscFunctionReturn(0);
121 }
122 
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
127 {
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
129   PetscErrorCode ierr;
130   PetscInt       i,rstart,nrows,*rows;
131 
132   PetscFunctionBegin;
133   *zrows = NULL;
134   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
135   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
136   for (i=0; i<nrows; i++) rows[i] += rstart;
137   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
144 {
145   PetscErrorCode ierr;
146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
147   PetscInt       i,n,*garray = aij->garray;
148   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
149   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
150   PetscReal      *work;
151 
152   PetscFunctionBegin;
153   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
154   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
155   if (type == NORM_2) {
156     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
157       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
158     }
159     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
160       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
161     }
162   } else if (type == NORM_1) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
168     }
169   } else if (type == NORM_INFINITY) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
175     }
176 
177   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
178   if (type == NORM_INFINITY) {
179     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
180   } else {
181     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
182   }
183   ierr = PetscFree(work);CHKERRQ(ierr);
184   if (type == NORM_2) {
185     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatDistribute_MPIAIJ"
192 /*
193     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
194     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
195 
196     Only for square matrices
197 
198     Used by a preconditioner, hence PETSC_EXTERN
199 */
200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
201 {
202   PetscMPIInt    rank,size;
203   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
204   PetscErrorCode ierr;
205   Mat            mat;
206   Mat_SeqAIJ     *gmata;
207   PetscMPIInt    tag;
208   MPI_Status     status;
209   PetscBool      aij;
210   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215   if (!rank) {
216     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
217     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
218   }
219   if (reuse == MAT_INITIAL_MATRIX) {
220     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
221     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
222     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
223     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
224     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
225     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
226     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
227     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
228     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
229 
230     rowners[0] = 0;
231     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
232     rstart = rowners[rank];
233     rend   = rowners[rank+1];
234     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
235     if (!rank) {
236       gmata = (Mat_SeqAIJ*) gmat->data;
237       /* send row lengths to all processors */
238       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
239       for (i=1; i<size; i++) {
240         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242       /* determine number diagonal and off-diagonal counts */
243       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
244       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
245       jj   = 0;
246       for (i=0; i<m; i++) {
247         for (j=0; j<dlens[i]; j++) {
248           if (gmata->j[jj] < rstart) ld[i]++;
249           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
250           jj++;
251         }
252       }
253       /* send column indices to other processes */
254       for (i=1; i<size; i++) {
255         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
256         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
257         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259 
260       /* send numerical values to other processes */
261       for (i=1; i<size; i++) {
262         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
263         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
264       }
265       gmataa = gmata->a;
266       gmataj = gmata->j;
267 
268     } else {
269       /* receive row lengths */
270       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
271       /* receive column indices */
272       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
273       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
275       /* determine number diagonal and off-diagonal counts */
276       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
277       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
278       jj   = 0;
279       for (i=0; i<m; i++) {
280         for (j=0; j<dlens[i]; j++) {
281           if (gmataj[jj] < rstart) ld[i]++;
282           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
283           jj++;
284         }
285       }
286       /* receive numerical values */
287       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
288       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
289     }
290     /* set preallocation */
291     for (i=0; i<m; i++) {
292       dlens[i] -= olens[i];
293     }
294     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
295     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
296 
297     for (i=0; i<m; i++) {
298       dlens[i] += olens[i];
299     }
300     cnt = 0;
301     for (i=0; i<m; i++) {
302       row  = rstart + i;
303       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
304       cnt += dlens[i];
305     }
306     if (rank) {
307       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
308     }
309     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
310     ierr = PetscFree(rowners);CHKERRQ(ierr);
311 
312     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
313 
314     *inmat = mat;
315   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
316     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
317     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
318     mat  = *inmat;
319     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
320     if (!rank) {
321       /* send numerical values to other processes */
322       gmata  = (Mat_SeqAIJ*) gmat->data;
323       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
324       gmataa = gmata->a;
325       for (i=1; i<size; i++) {
326         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
327         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
328       }
329       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
330     } else {
331       /* receive numerical values from process 0*/
332       nz   = Ad->nz + Ao->nz;
333       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
334       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
335     }
336     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
337     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
338     ad = Ad->a;
339     ao = Ao->a;
340     if (mat->rmap->n) {
341       i  = 0;
342       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
343       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
344     }
345     for (i=1; i<mat->rmap->n; i++) {
346       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
347       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
348     }
349     i--;
350     if (mat->rmap->n) {
351       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
352     }
353     if (rank) {
354       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
355     }
356   }
357   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to acess.
368 */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
372 {
373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
374   PetscErrorCode ierr;
375   PetscInt       n = aij->B->cmap->n,i;
376 
377   PetscFunctionBegin;
378   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
379 #if defined(PETSC_USE_CTABLE)
380   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
381   for (i=0; i<n; i++) {
382     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
383   }
384 #else
385   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
386   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
387   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
388 #endif
389   PetscFunctionReturn(0);
390 }
391 
392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
393 { \
394     if (col <= lastcol1)  low1 = 0;     \
395     else                 high1 = nrow1; \
396     lastcol1 = col;\
397     while (high1-low1 > 5) { \
398       t = (low1+high1)/2; \
399       if (rp1[t] > col) high1 = t; \
400       else              low1  = t; \
401     } \
402       for (_i=low1; _i<high1; _i++) { \
403         if (rp1[_i] > col) break; \
404         if (rp1[_i] == col) { \
405           if (addv == ADD_VALUES) ap1[_i] += value;   \
406           else                    ap1[_i] = value; \
407           goto a_noinsert; \
408         } \
409       }  \
410       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
411       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
412       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
413       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
414       N = nrow1++ - 1; a->nz++; high1++; \
415       /* shift up all the later entries in this row */ \
416       for (ii=N; ii>=_i; ii--) { \
417         rp1[ii+1] = rp1[ii]; \
418         ap1[ii+1] = ap1[ii]; \
419       } \
420       rp1[_i] = col;  \
421       ap1[_i] = value;  \
422       A->nonzerostate++;\
423       a_noinsert: ; \
424       ailen[row] = nrow1; \
425 }
426 
427 
428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
429   { \
430     if (col <= lastcol2) low2 = 0;                        \
431     else high2 = nrow2;                                   \
432     lastcol2 = col;                                       \
433     while (high2-low2 > 5) {                              \
434       t = (low2+high2)/2;                                 \
435       if (rp2[t] > col) high2 = t;                        \
436       else             low2  = t;                         \
437     }                                                     \
438     for (_i=low2; _i<high2; _i++) {                       \
439       if (rp2[_i] > col) break;                           \
440       if (rp2[_i] == col) {                               \
441         if (addv == ADD_VALUES) ap2[_i] += value;         \
442         else                    ap2[_i] = value;          \
443         goto b_noinsert;                                  \
444       }                                                   \
445     }                                                     \
446     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
447     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
448     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
449     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
450     N = nrow2++ - 1; b->nz++; high2++;                    \
451     /* shift up all the later entries in this row */      \
452     for (ii=N; ii>=_i; ii--) {                            \
453       rp2[ii+1] = rp2[ii];                                \
454       ap2[ii+1] = ap2[ii];                                \
455     }                                                     \
456     rp2[_i] = col;                                        \
457     ap2[_i] = value;                                      \
458     B->nonzerostate++;                                    \
459     b_noinsert: ;                                         \
460     bilen[row] = nrow2;                                   \
461   }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
466 {
467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
468   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
469   PetscErrorCode ierr;
470   PetscInt       l,*garray = mat->garray,diag;
471 
472   PetscFunctionBegin;
473   /* code only works for square matrices A */
474 
475   /* find size of row to the left of the diagonal part */
476   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
477   row  = row - diag;
478   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
479     if (garray[b->j[b->i[row]+l]] > diag) break;
480   }
481   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
482 
483   /* diagonal part */
484   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
485 
486   /* right of diagonal part */
487   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetValues_MPIAIJ"
493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
494 {
495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
496   PetscScalar    value;
497   PetscErrorCode ierr;
498   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
499   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
500   PetscBool      roworiented = aij->roworiented;
501 
502   /* Some Variables required in the macro */
503   Mat        A                 = aij->A;
504   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
505   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
506   MatScalar  *aa               = a->a;
507   PetscBool  ignorezeroentries = a->ignorezeroentries;
508   Mat        B                 = aij->B;
509   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
510   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
511   MatScalar  *ba               = b->a;
512 
513   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
514   PetscInt  nonew;
515   MatScalar *ap1,*ap2;
516 
517   PetscFunctionBegin;
518   for (i=0; i<m; i++) {
519     if (im[i] < 0) continue;
520 #if defined(PETSC_USE_DEBUG)
521     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
522 #endif
523     if (im[i] >= rstart && im[i] < rend) {
524       row      = im[i] - rstart;
525       lastcol1 = -1;
526       rp1      = aj + ai[row];
527       ap1      = aa + ai[row];
528       rmax1    = aimax[row];
529       nrow1    = ailen[row];
530       low1     = 0;
531       high1    = nrow1;
532       lastcol2 = -1;
533       rp2      = bj + bi[row];
534       ap2      = ba + bi[row];
535       rmax2    = bimax[row];
536       nrow2    = bilen[row];
537       low2     = 0;
538       high2    = nrow2;
539 
540       for (j=0; j<n; j++) {
541         if (roworiented) value = v[i*n+j];
542         else             value = v[i+j*m];
543         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
544         if (in[j] >= cstart && in[j] < cend) {
545           col   = in[j] - cstart;
546           nonew = a->nonew;
547           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
548         } else if (in[j] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
551 #endif
552         else {
553           if (mat->was_assembled) {
554             if (!aij->colmap) {
555               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
556             }
557 #if defined(PETSC_USE_CTABLE)
558             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
559             col--;
560 #else
561             col = aij->colmap[in[j]] - 1;
562 #endif
563             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
564               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
565               col  =  in[j];
566               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
567               B     = aij->B;
568               b     = (Mat_SeqAIJ*)B->data;
569               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
570               rp2   = bj + bi[row];
571               ap2   = ba + bi[row];
572               rmax2 = bimax[row];
573               nrow2 = bilen[row];
574               low2  = 0;
575               high2 = nrow2;
576               bm    = aij->B->rmap->n;
577               ba    = b->a;
578             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
579           } else col = in[j];
580           nonew = b->nonew;
581           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
582         }
583       }
584     } else {
585       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
586       if (!aij->donotstash) {
587         mat->assembled = PETSC_FALSE;
588         if (roworiented) {
589           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
590         } else {
591           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
592         }
593       }
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatGetValues_MPIAIJ"
601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
602 {
603   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
604   PetscErrorCode ierr;
605   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
607 
608   PetscFunctionBegin;
609   for (i=0; i<m; i++) {
610     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
611     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
612     if (idxm[i] >= rstart && idxm[i] < rend) {
613       row = idxm[i] - rstart;
614       for (j=0; j<n; j++) {
615         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
616         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
617         if (idxn[j] >= cstart && idxn[j] < cend) {
618           col  = idxn[j] - cstart;
619           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620         } else {
621           if (!aij->colmap) {
622             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
623           }
624 #if defined(PETSC_USE_CTABLE)
625           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
626           col--;
627 #else
628           col = aij->colmap[idxn[j]] - 1;
629 #endif
630           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
631           else {
632             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   /* make sure all processors are either in INSERTMODE or ADDMODE */
656   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
658   mat->insertmode = addv; /* in case this processor had no cache */
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
672   PetscErrorCode ierr;
673   PetscMPIInt    n;
674   PetscInt       i,j,rstart,ncols,flg;
675   PetscInt       *row,*col;
676   PetscBool      other_disassembled;
677   PetscScalar    *val;
678   InsertMode     addv = mat->insertmode;
679 
680   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
681 
682   PetscFunctionBegin;
683   if (!aij->donotstash && !mat->nooffprocentries) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       if (!flg) break;
687 
688       for (i=0; i<n; ) {
689         /* Now identify the consecutive vals belonging to the same row */
690         for (j=i,rstart=row[j]; j<n; j++) {
691           if (row[j] != rstart) break;
692         }
693         if (j < n) ncols = j-i;
694         else       ncols = n-i;
695         /* Now assemble all these values with a single function call */
696         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
697 
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702   }
703   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
704   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
705 
706   /* determine if any processor has disassembled, if so we must
707      also disassemble ourselfs, in order that we may reassemble. */
708   /*
709      if nonzero structure of submatrix B cannot change then we know that
710      no processor disassembled thus we can skip this stuff
711   */
712   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
713     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714     if (mat->was_assembled && !other_disassembled) {
715       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
716     }
717   }
718   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
719     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
720   }
721   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
722   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
723   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
724 
725   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
726 
727   aij->rowvalues = 0;
728 
729   /* used by MatAXPY() */
730   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
731   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
732 
733   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
734   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
735 
736   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
737   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
738     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
739     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
747 {
748   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
753   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatZeroRows_MPIAIJ"
759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
760 {
761   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
762   PetscInt      *owners = A->rmap->range;
763   PetscInt       n      = A->rmap->n;
764   PetscSF        sf;
765   PetscInt      *lrows;
766   PetscSFNode   *rrows;
767   PetscInt       r, p = 0, len = 0;
768   PetscErrorCode ierr;
769 
770   PetscFunctionBegin;
771   /* Create SF where leaves are input rows and roots are owned rows */
772   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
773   for (r = 0; r < n; ++r) lrows[r] = -1;
774   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
775   for (r = 0; r < N; ++r) {
776     const PetscInt idx   = rows[r];
777     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
778     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
779       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
780     }
781     if (A->nooffproczerorows) {
782       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
783       lrows[len++] = idx - owners[p];
784     } else {
785       rrows[r].rank = p;
786       rrows[r].index = rows[r] - owners[p];
787     }
788   }
789   if (!A->nooffproczerorows) {
790     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
791     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
792     /* Collect flags for rows to be zeroed */
793     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
794     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
795     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
796     /* Compress and put in row numbers */
797     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
798   }
799   /* fix right hand side if needed */
800   if (x && b) {
801     const PetscScalar *xx;
802     PetscScalar       *bb;
803 
804     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
805     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
806     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
807     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
808     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
809   }
810   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
811   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_Redundant"
1121 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1122 {
1123   PetscErrorCode ierr;
1124   Mat_Redundant  *redund = *redundant;
1125   PetscInt       i;
1126 
1127   PetscFunctionBegin;
1128   *redundant = NULL;
1129   if (redund){
1130     if (redund->matseq) { /* via MatGetSubMatrices()  */
1131       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1132       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1133       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1134       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1135     } else {
1136       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1137       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1138       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1139       for (i=0; i<redund->nrecvs; i++) {
1140         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1141         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1142       }
1143       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1144     }
1145 
1146     if (redund->subcomm) {
1147       ierr = PetscCommDestroy(&redund->subcomm);CHKERRQ(ierr);
1148     }
1149     ierr = PetscFree(redund);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatDestroy_MPIAIJ"
1156 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1157 {
1158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1159   PetscErrorCode ierr;
1160 
1161   PetscFunctionBegin;
1162 #if defined(PETSC_USE_LOG)
1163   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1164 #endif
1165   ierr = MatDestroy_Redundant(&mat->redundant);CHKERRQ(ierr);
1166   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1167   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1168   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1169   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1170 #if defined(PETSC_USE_CTABLE)
1171   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1172 #else
1173   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1174 #endif
1175   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1176   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1177   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1178   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1179   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1180   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1181 
1182   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1191 #if defined(PETSC_HAVE_ELEMENTAL)
1192   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1193 #endif
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1199 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1200 {
1201   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1202   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1203   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1204   PetscErrorCode ierr;
1205   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1206   int            fd;
1207   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1208   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1209   PetscScalar    *column_values;
1210   PetscInt       message_count,flowcontrolcount;
1211   FILE           *file;
1212 
1213   PetscFunctionBegin;
1214   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1215   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1216   nz   = A->nz + B->nz;
1217   if (!rank) {
1218     header[0] = MAT_FILE_CLASSID;
1219     header[1] = mat->rmap->N;
1220     header[2] = mat->cmap->N;
1221 
1222     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1223     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1224     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1225     /* get largest number of rows any processor has */
1226     rlen  = mat->rmap->n;
1227     range = mat->rmap->range;
1228     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1229   } else {
1230     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     rlen = mat->rmap->n;
1232   }
1233 
1234   /* load up the local row counts */
1235   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1236   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1237 
1238   /* store the row lengths to the file */
1239   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1240   if (!rank) {
1241     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     for (i=1; i<size; i++) {
1243       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1244       rlen = range[i+1] - range[i];
1245       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1246       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1247     }
1248     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1249   } else {
1250     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1251     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1255 
1256   /* load up the local column indices */
1257   nzmax = nz; /* th processor needs space a largest processor needs */
1258   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1260   cnt   = 0;
1261   for (i=0; i<mat->rmap->n; i++) {
1262     for (j=B->i[i]; j<B->i[i+1]; j++) {
1263       if ((col = garray[B->j[j]]) > cstart) break;
1264       column_indices[cnt++] = col;
1265     }
1266     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1267     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1268   }
1269   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1270 
1271   /* store the column indices to the file */
1272   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1273   if (!rank) {
1274     MPI_Status status;
1275     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1276     for (i=1; i<size; i++) {
1277       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1278       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1279       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1280       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1282     }
1283     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1284   } else {
1285     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1286     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1288     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1289   }
1290   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1291 
1292   /* load up the local column values */
1293   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1294   cnt  = 0;
1295   for (i=0; i<mat->rmap->n; i++) {
1296     for (j=B->i[i]; j<B->i[i+1]; j++) {
1297       if (garray[B->j[j]] > cstart) break;
1298       column_values[cnt++] = B->a[j];
1299     }
1300     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1301     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1302   }
1303   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1304 
1305   /* store the column values to the file */
1306   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1307   if (!rank) {
1308     MPI_Status status;
1309     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1310     for (i=1; i<size; i++) {
1311       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1312       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1313       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1314       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1316     }
1317     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1318   } else {
1319     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1320     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1321     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1322     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1323   }
1324   ierr = PetscFree(column_values);CHKERRQ(ierr);
1325 
1326   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1327   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #include <petscdraw.h>
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1334 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1335 {
1336   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1337   PetscErrorCode    ierr;
1338   PetscMPIInt       rank = aij->rank,size = aij->size;
1339   PetscBool         isdraw,iascii,isbinary;
1340   PetscViewer       sviewer;
1341   PetscViewerFormat format;
1342 
1343   PetscFunctionBegin;
1344   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1345   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1346   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1347   if (iascii) {
1348     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1349     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1350       MatInfo   info;
1351       PetscBool inodes;
1352 
1353       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1354       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1355       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1356       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1357       if (!inodes) {
1358         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1359                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1360       } else {
1361         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1362                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1363       }
1364       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1366       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1367       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1368       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1369       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1370       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1371       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1372       PetscFunctionReturn(0);
1373     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1374       PetscInt inodecount,inodelimit,*inodes;
1375       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1376       if (inodes) {
1377         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1378       } else {
1379         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1380       }
1381       PetscFunctionReturn(0);
1382     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1383       PetscFunctionReturn(0);
1384     }
1385   } else if (isbinary) {
1386     if (size == 1) {
1387       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1388       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1389     } else {
1390       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1391     }
1392     PetscFunctionReturn(0);
1393   } else if (isdraw) {
1394     PetscDraw draw;
1395     PetscBool isnull;
1396     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1397     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1398   }
1399 
1400   {
1401     /* assemble the entire matrix onto first processor. */
1402     Mat        A;
1403     Mat_SeqAIJ *Aloc;
1404     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1405     MatScalar  *a;
1406 
1407     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1408     if (!rank) {
1409       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1410     } else {
1411       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1412     }
1413     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1414     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1415     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1416     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1417     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1418 
1419     /* copy over the A part */
1420     Aloc = (Mat_SeqAIJ*)aij->A->data;
1421     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1422     row  = mat->rmap->rstart;
1423     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1424     for (i=0; i<m; i++) {
1425       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1426       row++;
1427       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1428     }
1429     aj = Aloc->j;
1430     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1431 
1432     /* copy over the B part */
1433     Aloc = (Mat_SeqAIJ*)aij->B->data;
1434     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1435     row  = mat->rmap->rstart;
1436     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1437     ct   = cols;
1438     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1439     for (i=0; i<m; i++) {
1440       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1441       row++;
1442       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1443     }
1444     ierr = PetscFree(ct);CHKERRQ(ierr);
1445     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1446     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1447     /*
1448        Everyone has to call to draw the matrix since the graphics waits are
1449        synchronized across all processors that share the PetscDraw object
1450     */
1451     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1452     if (!rank) {
1453       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1454     }
1455     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1456     ierr = MatDestroy(&A);CHKERRQ(ierr);
1457   }
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatView_MPIAIJ"
1463 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1464 {
1465   PetscErrorCode ierr;
1466   PetscBool      iascii,isdraw,issocket,isbinary;
1467 
1468   PetscFunctionBegin;
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1473   if (iascii || isdraw || isbinary || issocket) {
1474     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1475   }
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatSOR_MPIAIJ"
1481 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1482 {
1483   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1484   PetscErrorCode ierr;
1485   Vec            bb1 = 0;
1486   PetscBool      hasop;
1487 
1488   PetscFunctionBegin;
1489   if (flag == SOR_APPLY_UPPER) {
1490     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1491     PetscFunctionReturn(0);
1492   }
1493 
1494   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1495     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1496   }
1497 
1498   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1499     if (flag & SOR_ZERO_INITIAL_GUESS) {
1500       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1501       its--;
1502     }
1503 
1504     while (its--) {
1505       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1507 
1508       /* update rhs: bb1 = bb - B*x */
1509       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1510       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1511 
1512       /* local sweep */
1513       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1514     }
1515   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1516     if (flag & SOR_ZERO_INITIAL_GUESS) {
1517       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1518       its--;
1519     }
1520     while (its--) {
1521       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523 
1524       /* update rhs: bb1 = bb - B*x */
1525       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1526       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1527 
1528       /* local sweep */
1529       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1530     }
1531   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1532     if (flag & SOR_ZERO_INITIAL_GUESS) {
1533       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1534       its--;
1535     }
1536     while (its--) {
1537       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1539 
1540       /* update rhs: bb1 = bb - B*x */
1541       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1542       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1543 
1544       /* local sweep */
1545       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1546     }
1547   } else if (flag & SOR_EISENSTAT) {
1548     Vec xx1;
1549 
1550     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1551     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1552 
1553     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1554     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555     if (!mat->diag) {
1556       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1557       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1558     }
1559     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1560     if (hasop) {
1561       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1562     } else {
1563       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1564     }
1565     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1566 
1567     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1568 
1569     /* local sweep */
1570     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1571     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1572     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1573   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1574 
1575   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatPermute_MPIAIJ"
1581 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1582 {
1583   Mat            aA,aB,Aperm;
1584   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1585   PetscScalar    *aa,*ba;
1586   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1587   PetscSF        rowsf,sf;
1588   IS             parcolp = NULL;
1589   PetscBool      done;
1590   PetscErrorCode ierr;
1591 
1592   PetscFunctionBegin;
1593   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1594   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1595   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1596   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1597 
1598   /* Invert row permutation to find out where my rows should go */
1599   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1600   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1601   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1602   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1603   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1604   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1605 
1606   /* Invert column permutation to find out where my columns should go */
1607   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1608   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1609   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1610   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1611   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1612   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1613   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1614 
1615   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1616   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1617   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1618 
1619   /* Find out where my gcols should go */
1620   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1621   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1622   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1623   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1624   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1625   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1626   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1627   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1628 
1629   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1630   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1631   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1632   for (i=0; i<m; i++) {
1633     PetscInt row = rdest[i],rowner;
1634     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1635     for (j=ai[i]; j<ai[i+1]; j++) {
1636       PetscInt cowner,col = cdest[aj[j]];
1637       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1638       if (rowner == cowner) dnnz[i]++;
1639       else onnz[i]++;
1640     }
1641     for (j=bi[i]; j<bi[i+1]; j++) {
1642       PetscInt cowner,col = gcdest[bj[j]];
1643       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1644       if (rowner == cowner) dnnz[i]++;
1645       else onnz[i]++;
1646     }
1647   }
1648   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1649   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1650   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1651   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1652   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1653 
1654   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1655   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1656   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1657   for (i=0; i<m; i++) {
1658     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1659     PetscInt j0,rowlen;
1660     rowlen = ai[i+1] - ai[i];
1661     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1662       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1663       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1664     }
1665     rowlen = bi[i+1] - bi[i];
1666     for (j0=j=0; j<rowlen; j0=j) {
1667       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1668       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1669     }
1670   }
1671   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1673   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1674   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1675   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1676   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1677   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1678   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1679   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1680   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1681   *B = Aperm;
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1687 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1688 {
1689   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1690   Mat            A    = mat->A,B = mat->B;
1691   PetscErrorCode ierr;
1692   PetscReal      isend[5],irecv[5];
1693 
1694   PetscFunctionBegin;
1695   info->block_size = 1.0;
1696   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1697 
1698   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1699   isend[3] = info->memory;  isend[4] = info->mallocs;
1700 
1701   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1702 
1703   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1704   isend[3] += info->memory;  isend[4] += info->mallocs;
1705   if (flag == MAT_LOCAL) {
1706     info->nz_used      = isend[0];
1707     info->nz_allocated = isend[1];
1708     info->nz_unneeded  = isend[2];
1709     info->memory       = isend[3];
1710     info->mallocs      = isend[4];
1711   } else if (flag == MAT_GLOBAL_MAX) {
1712     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1713 
1714     info->nz_used      = irecv[0];
1715     info->nz_allocated = irecv[1];
1716     info->nz_unneeded  = irecv[2];
1717     info->memory       = irecv[3];
1718     info->mallocs      = irecv[4];
1719   } else if (flag == MAT_GLOBAL_SUM) {
1720     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1721 
1722     info->nz_used      = irecv[0];
1723     info->nz_allocated = irecv[1];
1724     info->nz_unneeded  = irecv[2];
1725     info->memory       = irecv[3];
1726     info->mallocs      = irecv[4];
1727   }
1728   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1729   info->fill_ratio_needed = 0;
1730   info->factor_mallocs    = 0;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatSetOption_MPIAIJ"
1736 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1737 {
1738   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   switch (op) {
1743   case MAT_NEW_NONZERO_LOCATIONS:
1744   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1745   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1746   case MAT_KEEP_NONZERO_PATTERN:
1747   case MAT_NEW_NONZERO_LOCATION_ERR:
1748   case MAT_USE_INODES:
1749   case MAT_IGNORE_ZERO_ENTRIES:
1750     MatCheckPreallocated(A,1);
1751     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1752     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1753     break;
1754   case MAT_ROW_ORIENTED:
1755     a->roworiented = flg;
1756 
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1759     break;
1760   case MAT_NEW_DIAGONALS:
1761     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1762     break;
1763   case MAT_IGNORE_OFF_PROC_ENTRIES:
1764     a->donotstash = flg;
1765     break;
1766   case MAT_SPD:
1767     A->spd_set = PETSC_TRUE;
1768     A->spd     = flg;
1769     if (flg) {
1770       A->symmetric                  = PETSC_TRUE;
1771       A->structurally_symmetric     = PETSC_TRUE;
1772       A->symmetric_set              = PETSC_TRUE;
1773       A->structurally_symmetric_set = PETSC_TRUE;
1774     }
1775     break;
1776   case MAT_SYMMETRIC:
1777     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1778     break;
1779   case MAT_STRUCTURALLY_SYMMETRIC:
1780     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1781     break;
1782   case MAT_HERMITIAN:
1783     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1784     break;
1785   case MAT_SYMMETRY_ETERNAL:
1786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1787     break;
1788   default:
1789     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1790   }
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatGetRow_MPIAIJ"
1796 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1797 {
1798   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1799   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1800   PetscErrorCode ierr;
1801   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1802   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1803   PetscInt       *cmap,*idx_p;
1804 
1805   PetscFunctionBegin;
1806   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1807   mat->getrowactive = PETSC_TRUE;
1808 
1809   if (!mat->rowvalues && (idx || v)) {
1810     /*
1811         allocate enough space to hold information from the longest row.
1812     */
1813     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1814     PetscInt   max = 1,tmp;
1815     for (i=0; i<matin->rmap->n; i++) {
1816       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1817       if (max < tmp) max = tmp;
1818     }
1819     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1820   }
1821 
1822   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1823   lrow = row - rstart;
1824 
1825   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1826   if (!v)   {pvA = 0; pvB = 0;}
1827   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1828   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1829   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1830   nztot = nzA + nzB;
1831 
1832   cmap = mat->garray;
1833   if (v  || idx) {
1834     if (nztot) {
1835       /* Sort by increasing column numbers, assuming A and B already sorted */
1836       PetscInt imark = -1;
1837       if (v) {
1838         *v = v_p = mat->rowvalues;
1839         for (i=0; i<nzB; i++) {
1840           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1841           else break;
1842         }
1843         imark = i;
1844         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1845         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1846       }
1847       if (idx) {
1848         *idx = idx_p = mat->rowindices;
1849         if (imark > -1) {
1850           for (i=0; i<imark; i++) {
1851             idx_p[i] = cmap[cworkB[i]];
1852           }
1853         } else {
1854           for (i=0; i<nzB; i++) {
1855             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1856             else break;
1857           }
1858           imark = i;
1859         }
1860         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1861         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1862       }
1863     } else {
1864       if (idx) *idx = 0;
1865       if (v)   *v   = 0;
1866     }
1867   }
1868   *nz  = nztot;
1869   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1870   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1876 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1877 {
1878   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1879 
1880   PetscFunctionBegin;
1881   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1882   aij->getrowactive = PETSC_FALSE;
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatNorm_MPIAIJ"
1888 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1889 {
1890   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1891   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1892   PetscErrorCode ierr;
1893   PetscInt       i,j,cstart = mat->cmap->rstart;
1894   PetscReal      sum = 0.0;
1895   MatScalar      *v;
1896 
1897   PetscFunctionBegin;
1898   if (aij->size == 1) {
1899     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1900   } else {
1901     if (type == NORM_FROBENIUS) {
1902       v = amat->a;
1903       for (i=0; i<amat->nz; i++) {
1904         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1905       }
1906       v = bmat->a;
1907       for (i=0; i<bmat->nz; i++) {
1908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1909       }
1910       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1911       *norm = PetscSqrtReal(*norm);
1912     } else if (type == NORM_1) { /* max column norm */
1913       PetscReal *tmp,*tmp2;
1914       PetscInt  *jj,*garray = aij->garray;
1915       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1916       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1917       *norm = 0.0;
1918       v     = amat->a; jj = amat->j;
1919       for (j=0; j<amat->nz; j++) {
1920         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1921       }
1922       v = bmat->a; jj = bmat->j;
1923       for (j=0; j<bmat->nz; j++) {
1924         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1925       }
1926       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1927       for (j=0; j<mat->cmap->N; j++) {
1928         if (tmp2[j] > *norm) *norm = tmp2[j];
1929       }
1930       ierr = PetscFree(tmp);CHKERRQ(ierr);
1931       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1932     } else if (type == NORM_INFINITY) { /* max row norm */
1933       PetscReal ntemp = 0.0;
1934       for (j=0; j<aij->A->rmap->n; j++) {
1935         v   = amat->a + amat->i[j];
1936         sum = 0.0;
1937         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1938           sum += PetscAbsScalar(*v); v++;
1939         }
1940         v = bmat->a + bmat->i[j];
1941         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1942           sum += PetscAbsScalar(*v); v++;
1943         }
1944         if (sum > ntemp) ntemp = sum;
1945       }
1946       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1947     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1948   }
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatTranspose_MPIAIJ"
1954 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1955 {
1956   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1957   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1958   PetscErrorCode ierr;
1959   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1960   PetscInt       cstart = A->cmap->rstart,ncol;
1961   Mat            B;
1962   MatScalar      *array;
1963 
1964   PetscFunctionBegin;
1965   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1966 
1967   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1968   ai = Aloc->i; aj = Aloc->j;
1969   bi = Bloc->i; bj = Bloc->j;
1970   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1971     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1972     PetscSFNode          *oloc;
1973     PETSC_UNUSED PetscSF sf;
1974 
1975     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1976     /* compute d_nnz for preallocation */
1977     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1978     for (i=0; i<ai[ma]; i++) {
1979       d_nnz[aj[i]]++;
1980       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1981     }
1982     /* compute local off-diagonal contributions */
1983     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1984     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1985     /* map those to global */
1986     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1987     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1988     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1989     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1990     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1991     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1992     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1993 
1994     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1995     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1996     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1997     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1998     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1999     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2000   } else {
2001     B    = *matout;
2002     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2003     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2004   }
2005 
2006   /* copy over the A part */
2007   array = Aloc->a;
2008   row   = A->rmap->rstart;
2009   for (i=0; i<ma; i++) {
2010     ncol = ai[i+1]-ai[i];
2011     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2012     row++;
2013     array += ncol; aj += ncol;
2014   }
2015   aj = Aloc->j;
2016   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2017 
2018   /* copy over the B part */
2019   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2020   array = Bloc->a;
2021   row   = A->rmap->rstart;
2022   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2023   cols_tmp = cols;
2024   for (i=0; i<mb; i++) {
2025     ncol = bi[i+1]-bi[i];
2026     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2027     row++;
2028     array += ncol; cols_tmp += ncol;
2029   }
2030   ierr = PetscFree(cols);CHKERRQ(ierr);
2031 
2032   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2033   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2034   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2035     *matout = B;
2036   } else {
2037     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2038   }
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2044 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2045 {
2046   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2047   Mat            a    = aij->A,b = aij->B;
2048   PetscErrorCode ierr;
2049   PetscInt       s1,s2,s3;
2050 
2051   PetscFunctionBegin;
2052   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2053   if (rr) {
2054     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2055     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2056     /* Overlap communication with computation. */
2057     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2058   }
2059   if (ll) {
2060     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2061     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2062     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2063   }
2064   /* scale  the diagonal block */
2065   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2066 
2067   if (rr) {
2068     /* Do a scatter end and then right scale the off-diagonal block */
2069     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2070     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2077 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2078 {
2079   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2080   PetscErrorCode ierr;
2081 
2082   PetscFunctionBegin;
2083   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatEqual_MPIAIJ"
2089 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2090 {
2091   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2092   Mat            a,b,c,d;
2093   PetscBool      flg;
2094   PetscErrorCode ierr;
2095 
2096   PetscFunctionBegin;
2097   a = matA->A; b = matA->B;
2098   c = matB->A; d = matB->B;
2099 
2100   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2101   if (flg) {
2102     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2103   }
2104   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatCopy_MPIAIJ"
2110 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2111 {
2112   PetscErrorCode ierr;
2113   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2114   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2115 
2116   PetscFunctionBegin;
2117   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2118   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2119     /* because of the column compression in the off-processor part of the matrix a->B,
2120        the number of columns in a->B and b->B may be different, hence we cannot call
2121        the MatCopy() directly on the two parts. If need be, we can provide a more
2122        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2123        then copying the submatrices */
2124     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2125   } else {
2126     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2127     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2128   }
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNCT__
2133 #define __FUNCT__ "MatSetUp_MPIAIJ"
2134 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2135 {
2136   PetscErrorCode ierr;
2137 
2138   PetscFunctionBegin;
2139   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 /*
2144    Computes the number of nonzeros per row needed for preallocation when X and Y
2145    have different nonzero structure.
2146 */
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2149 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2150 {
2151   PetscInt       i,j,k,nzx,nzy;
2152 
2153   PetscFunctionBegin;
2154   /* Set the number of nonzeros in the new matrix */
2155   for (i=0; i<m; i++) {
2156     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2157     nzx = xi[i+1] - xi[i];
2158     nzy = yi[i+1] - yi[i];
2159     nnz[i] = 0;
2160     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2161       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2162       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2163       nnz[i]++;
2164     }
2165     for (; k<nzy; k++) nnz[i]++;
2166   }
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2173 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2174 {
2175   PetscErrorCode ierr;
2176   PetscInt       m = Y->rmap->N;
2177   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2178   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2179 
2180   PetscFunctionBegin;
2181   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatAXPY_MPIAIJ"
2187 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2188 {
2189   PetscErrorCode ierr;
2190   PetscInt       i;
2191   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2192   PetscBLASInt   bnz,one=1;
2193   Mat_SeqAIJ     *x,*y;
2194 
2195   PetscFunctionBegin;
2196   if (str == SAME_NONZERO_PATTERN) {
2197     PetscScalar alpha = a;
2198     x    = (Mat_SeqAIJ*)xx->A->data;
2199     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2200     y    = (Mat_SeqAIJ*)yy->A->data;
2201     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2202     x    = (Mat_SeqAIJ*)xx->B->data;
2203     y    = (Mat_SeqAIJ*)yy->B->data;
2204     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2205     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2206     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2207   } else if (str == SUBSET_NONZERO_PATTERN) {
2208     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2209 
2210     x = (Mat_SeqAIJ*)xx->B->data;
2211     y = (Mat_SeqAIJ*)yy->B->data;
2212     if (y->xtoy && y->XtoY != xx->B) {
2213       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2214       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2215     }
2216     if (!y->xtoy) { /* get xtoy */
2217       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2218       y->XtoY = xx->B;
2219       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2220     }
2221     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2222     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2223   } else {
2224     Mat      B;
2225     PetscInt *nnz_d,*nnz_o;
2226     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2227     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2228     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2229     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2230     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2231     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2232     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2233     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2234     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2235     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2236     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2237     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2238     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2239     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2240   }
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatConjugate_MPIAIJ"
2248 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2249 {
2250 #if defined(PETSC_USE_COMPLEX)
2251   PetscErrorCode ierr;
2252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2253 
2254   PetscFunctionBegin;
2255   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2256   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2257 #else
2258   PetscFunctionBegin;
2259 #endif
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatRealPart_MPIAIJ"
2265 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2266 {
2267   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2268   PetscErrorCode ierr;
2269 
2270   PetscFunctionBegin;
2271   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2272   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 #undef __FUNCT__
2277 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2278 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2279 {
2280   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2281   PetscErrorCode ierr;
2282 
2283   PetscFunctionBegin;
2284   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2285   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #if defined(PETSC_HAVE_PBGL)
2290 
2291 #include <boost/parallel/mpi/bsp_process_group.hpp>
2292 #include <boost/graph/distributed/ilu_default_graph.hpp>
2293 #include <boost/graph/distributed/ilu_0_block.hpp>
2294 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2295 #include <boost/graph/distributed/petsc/interface.hpp>
2296 #include <boost/multi_array.hpp>
2297 #include <boost/parallel/distributed_property_map->hpp>
2298 
2299 #undef __FUNCT__
2300 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2301 /*
2302   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2303 */
2304 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2305 {
2306   namespace petsc = boost::distributed::petsc;
2307 
2308   namespace graph_dist = boost::graph::distributed;
2309   using boost::graph::distributed::ilu_default::process_group_type;
2310   using boost::graph::ilu_permuted;
2311 
2312   PetscBool      row_identity, col_identity;
2313   PetscContainer c;
2314   PetscInt       m, n, M, N;
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2319   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2320   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2321   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2322 
2323   process_group_type pg;
2324   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2325   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2326   lgraph_type& level_graph = *lgraph_p;
2327   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2328 
2329   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2330   ilu_permuted(level_graph);
2331 
2332   /* put together the new matrix */
2333   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2334   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2335   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2336   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2337   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2338   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2339   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2340   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341 
2342   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2343   ierr = PetscContainerSetPointer(c, lgraph_p);
2344   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2345   ierr = PetscContainerDestroy(&c);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2351 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2352 {
2353   PetscFunctionBegin;
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 #undef __FUNCT__
2358 #define __FUNCT__ "MatSolve_MPIAIJ"
2359 /*
2360   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2361 */
2362 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2363 {
2364   namespace graph_dist = boost::graph::distributed;
2365 
2366   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2367   lgraph_type    *lgraph_p;
2368   PetscContainer c;
2369   PetscErrorCode ierr;
2370 
2371   PetscFunctionBegin;
2372   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2373   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2374   ierr = VecCopy(b, x);CHKERRQ(ierr);
2375 
2376   PetscScalar *array_x;
2377   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2378   PetscInt sx;
2379   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2380 
2381   PetscScalar *array_b;
2382   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2383   PetscInt sb;
2384   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2385 
2386   lgraph_type& level_graph = *lgraph_p;
2387   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2388 
2389   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2390   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2391   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2392 
2393   typedef boost::iterator_property_map<array_ref_type::iterator,
2394                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2395   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2396   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2397 
2398   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2399   PetscFunctionReturn(0);
2400 }
2401 #endif
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2405 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2406 {
2407   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2408   PetscErrorCode ierr;
2409   PetscInt       i,*idxb = 0;
2410   PetscScalar    *va,*vb;
2411   Vec            vtmp;
2412 
2413   PetscFunctionBegin;
2414   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2415   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2416   if (idx) {
2417     for (i=0; i<A->rmap->n; i++) {
2418       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2419     }
2420   }
2421 
2422   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2423   if (idx) {
2424     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2425   }
2426   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2427   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2428 
2429   for (i=0; i<A->rmap->n; i++) {
2430     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2431       va[i] = vb[i];
2432       if (idx) idx[i] = a->garray[idxb[i]];
2433     }
2434   }
2435 
2436   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2437   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2438   ierr = PetscFree(idxb);CHKERRQ(ierr);
2439   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2446 {
2447   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2448   PetscErrorCode ierr;
2449   PetscInt       i,*idxb = 0;
2450   PetscScalar    *va,*vb;
2451   Vec            vtmp;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2456   if (idx) {
2457     for (i=0; i<A->cmap->n; i++) {
2458       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2459     }
2460   }
2461 
2462   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2463   if (idx) {
2464     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2465   }
2466   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2467   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2468 
2469   for (i=0; i<A->rmap->n; i++) {
2470     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2471       va[i] = vb[i];
2472       if (idx) idx[i] = a->garray[idxb[i]];
2473     }
2474   }
2475 
2476   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2477   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2478   ierr = PetscFree(idxb);CHKERRQ(ierr);
2479   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2485 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2486 {
2487   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2488   PetscInt       n      = A->rmap->n;
2489   PetscInt       cstart = A->cmap->rstart;
2490   PetscInt       *cmap  = mat->garray;
2491   PetscInt       *diagIdx, *offdiagIdx;
2492   Vec            diagV, offdiagV;
2493   PetscScalar    *a, *diagA, *offdiagA;
2494   PetscInt       r;
2495   PetscErrorCode ierr;
2496 
2497   PetscFunctionBegin;
2498   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2499   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2500   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2501   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2502   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2503   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2504   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2505   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2506   for (r = 0; r < n; ++r) {
2507     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2508       a[r]   = diagA[r];
2509       idx[r] = cstart + diagIdx[r];
2510     } else {
2511       a[r]   = offdiagA[r];
2512       idx[r] = cmap[offdiagIdx[r]];
2513     }
2514   }
2515   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2516   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2517   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2518   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2519   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2520   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2526 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2527 {
2528   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2529   PetscInt       n      = A->rmap->n;
2530   PetscInt       cstart = A->cmap->rstart;
2531   PetscInt       *cmap  = mat->garray;
2532   PetscInt       *diagIdx, *offdiagIdx;
2533   Vec            diagV, offdiagV;
2534   PetscScalar    *a, *diagA, *offdiagA;
2535   PetscInt       r;
2536   PetscErrorCode ierr;
2537 
2538   PetscFunctionBegin;
2539   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2540   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2541   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2542   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2543   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2544   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2545   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2546   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2547   for (r = 0; r < n; ++r) {
2548     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2549       a[r]   = diagA[r];
2550       idx[r] = cstart + diagIdx[r];
2551     } else {
2552       a[r]   = offdiagA[r];
2553       idx[r] = cmap[offdiagIdx[r]];
2554     }
2555   }
2556   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2557   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2558   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2559   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2560   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2561   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2567 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2568 {
2569   PetscErrorCode ierr;
2570   Mat            *dummy;
2571 
2572   PetscFunctionBegin;
2573   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2574   *newmat = *dummy;
2575   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 #undef __FUNCT__
2580 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2581 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2582 {
2583   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2584   PetscErrorCode ierr;
2585 
2586   PetscFunctionBegin;
2587   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 #undef __FUNCT__
2592 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2593 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2594 {
2595   PetscErrorCode ierr;
2596   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2597 
2598   PetscFunctionBegin;
2599   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2600   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2601   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2602   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 /* -------------------------------------------------------------------*/
2607 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2608                                        MatGetRow_MPIAIJ,
2609                                        MatRestoreRow_MPIAIJ,
2610                                        MatMult_MPIAIJ,
2611                                 /* 4*/ MatMultAdd_MPIAIJ,
2612                                        MatMultTranspose_MPIAIJ,
2613                                        MatMultTransposeAdd_MPIAIJ,
2614 #if defined(PETSC_HAVE_PBGL)
2615                                        MatSolve_MPIAIJ,
2616 #else
2617                                        0,
2618 #endif
2619                                        0,
2620                                        0,
2621                                 /*10*/ 0,
2622                                        0,
2623                                        0,
2624                                        MatSOR_MPIAIJ,
2625                                        MatTranspose_MPIAIJ,
2626                                 /*15*/ MatGetInfo_MPIAIJ,
2627                                        MatEqual_MPIAIJ,
2628                                        MatGetDiagonal_MPIAIJ,
2629                                        MatDiagonalScale_MPIAIJ,
2630                                        MatNorm_MPIAIJ,
2631                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2632                                        MatAssemblyEnd_MPIAIJ,
2633                                        MatSetOption_MPIAIJ,
2634                                        MatZeroEntries_MPIAIJ,
2635                                 /*24*/ MatZeroRows_MPIAIJ,
2636                                        0,
2637 #if defined(PETSC_HAVE_PBGL)
2638                                        0,
2639 #else
2640                                        0,
2641 #endif
2642                                        0,
2643                                        0,
2644                                 /*29*/ MatSetUp_MPIAIJ,
2645 #if defined(PETSC_HAVE_PBGL)
2646                                        0,
2647 #else
2648                                        0,
2649 #endif
2650                                        0,
2651                                        0,
2652                                        0,
2653                                 /*34*/ MatDuplicate_MPIAIJ,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                 /*39*/ MatAXPY_MPIAIJ,
2659                                        MatGetSubMatrices_MPIAIJ,
2660                                        MatIncreaseOverlap_MPIAIJ,
2661                                        MatGetValues_MPIAIJ,
2662                                        MatCopy_MPIAIJ,
2663                                 /*44*/ MatGetRowMax_MPIAIJ,
2664                                        MatScale_MPIAIJ,
2665                                        0,
2666                                        MatDiagonalSet_MPIAIJ,
2667                                        MatZeroRowsColumns_MPIAIJ,
2668                                 /*49*/ MatSetRandom_MPIAIJ,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2674                                        0,
2675                                        MatSetUnfactored_MPIAIJ,
2676                                        MatPermute_MPIAIJ,
2677                                        0,
2678                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2679                                        MatDestroy_MPIAIJ,
2680                                        MatView_MPIAIJ,
2681                                        0,
2682                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2683                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2684                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2689                                        MatGetRowMinAbs_MPIAIJ,
2690                                        0,
2691                                        MatSetColoring_MPIAIJ,
2692                                        0,
2693                                        MatSetValuesAdifor_MPIAIJ,
2694                                 /*75*/ MatFDColoringApply_AIJ,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        MatFindZeroDiagonals_MPIAIJ,
2699                                 /*80*/ 0,
2700                                        0,
2701                                        0,
2702                                 /*83*/ MatLoad_MPIAIJ,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2709                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2710                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2711                                        MatPtAP_MPIAIJ_MPIAIJ,
2712                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2713                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                 /*99*/ 0,
2719                                        0,
2720                                        0,
2721                                        MatConjugate_MPIAIJ,
2722                                        0,
2723                                 /*104*/MatSetValuesRow_MPIAIJ,
2724                                        MatRealPart_MPIAIJ,
2725                                        MatImaginaryPart_MPIAIJ,
2726                                        0,
2727                                        0,
2728                                 /*109*/0,
2729                                        0,
2730                                        MatGetRowMin_MPIAIJ,
2731                                        0,
2732                                        0,
2733                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                        0,
2738                                 /*119*/0,
2739                                        0,
2740                                        0,
2741                                        0,
2742                                        MatGetMultiProcBlock_MPIAIJ,
2743                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2744                                        MatGetColumnNorms_MPIAIJ,
2745                                        MatInvertBlockDiagonal_MPIAIJ,
2746                                        0,
2747                                        MatGetSubMatricesParallel_MPIAIJ,
2748                                 /*129*/0,
2749                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2750                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2751                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2752                                        0,
2753                                 /*134*/0,
2754                                        0,
2755                                        0,
2756                                        0,
2757                                        0,
2758                                 /*139*/0,
2759                                        0,
2760                                        0,
2761                                        MatFDColoringSetUp_MPIXAIJ,
2762                                        0,
2763                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2764 };
2765 
2766 /* ----------------------------------------------------------------------------------------*/
2767 
2768 #undef __FUNCT__
2769 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2770 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2771 {
2772   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2773   PetscErrorCode ierr;
2774 
2775   PetscFunctionBegin;
2776   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2777   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2783 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2784 {
2785   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2786   PetscErrorCode ierr;
2787 
2788   PetscFunctionBegin;
2789   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2790   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2796 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2797 {
2798   Mat_MPIAIJ     *b;
2799   PetscErrorCode ierr;
2800 
2801   PetscFunctionBegin;
2802   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2803   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2804   b = (Mat_MPIAIJ*)B->data;
2805 
2806   if (!B->preallocated) {
2807     /* Explicitly create 2 MATSEQAIJ matrices. */
2808     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2809     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2810     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2811     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2812     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2813     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2814     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2815     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2816     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2817     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2818   }
2819 
2820   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2821   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2822   B->preallocated = PETSC_TRUE;
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 #undef __FUNCT__
2827 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2828 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2829 {
2830   Mat            mat;
2831   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2832   PetscErrorCode ierr;
2833 
2834   PetscFunctionBegin;
2835   *newmat = 0;
2836   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2837   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2838   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2839   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2840   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2841   a       = (Mat_MPIAIJ*)mat->data;
2842 
2843   mat->factortype   = matin->factortype;
2844   mat->assembled    = PETSC_TRUE;
2845   mat->insertmode   = NOT_SET_VALUES;
2846   mat->preallocated = PETSC_TRUE;
2847 
2848   a->size         = oldmat->size;
2849   a->rank         = oldmat->rank;
2850   a->donotstash   = oldmat->donotstash;
2851   a->roworiented  = oldmat->roworiented;
2852   a->rowindices   = 0;
2853   a->rowvalues    = 0;
2854   a->getrowactive = PETSC_FALSE;
2855 
2856   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2857   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2858 
2859   if (oldmat->colmap) {
2860 #if defined(PETSC_USE_CTABLE)
2861     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2862 #else
2863     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
2864     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2865     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2866 #endif
2867   } else a->colmap = 0;
2868   if (oldmat->garray) {
2869     PetscInt len;
2870     len  = oldmat->B->cmap->n;
2871     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
2872     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2873     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2874   } else a->garray = 0;
2875 
2876   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2877   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2878   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2879   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2880   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2881   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2882   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2883   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2884   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2885   *newmat = mat;
2886   PetscFunctionReturn(0);
2887 }
2888 
2889 
2890 
2891 #undef __FUNCT__
2892 #define __FUNCT__ "MatLoad_MPIAIJ"
2893 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2894 {
2895   PetscScalar    *vals,*svals;
2896   MPI_Comm       comm;
2897   PetscErrorCode ierr;
2898   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2899   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2900   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2901   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2902   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2903   int            fd;
2904   PetscInt       bs = 1;
2905 
2906   PetscFunctionBegin;
2907   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2908   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2909   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2910   if (!rank) {
2911     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2912     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2913     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2914   }
2915 
2916   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
2917   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2918   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2919 
2920   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2921 
2922   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2923   M    = header[1]; N = header[2];
2924   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2925   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2926   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2927 
2928   /* If global sizes are set, check if they are consistent with that given in the file */
2929   if (sizesset) {
2930     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2931   }
2932   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2933   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2934 
2935   /* determine ownership of all (block) rows */
2936   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2937   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2938   else m = newMat->rmap->n; /* Set by user */
2939 
2940   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
2941   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2942 
2943   /* First process needs enough room for process with most rows */
2944   if (!rank) {
2945     mmax = rowners[1];
2946     for (i=2; i<=size; i++) {
2947       mmax = PetscMax(mmax, rowners[i]);
2948     }
2949   } else mmax = -1;             /* unused, but compilers complain */
2950 
2951   rowners[0] = 0;
2952   for (i=2; i<=size; i++) {
2953     rowners[i] += rowners[i-1];
2954   }
2955   rstart = rowners[rank];
2956   rend   = rowners[rank+1];
2957 
2958   /* distribute row lengths to all processors */
2959   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2960   if (!rank) {
2961     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2962     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2963     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2964     for (j=0; j<m; j++) {
2965       procsnz[0] += ourlens[j];
2966     }
2967     for (i=1; i<size; i++) {
2968       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2969       /* calculate the number of nonzeros on each processor */
2970       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2971         procsnz[i] += rowlengths[j];
2972       }
2973       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2974     }
2975     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2976   } else {
2977     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2978   }
2979 
2980   if (!rank) {
2981     /* determine max buffer needed and allocate it */
2982     maxnz = 0;
2983     for (i=0; i<size; i++) {
2984       maxnz = PetscMax(maxnz,procsnz[i]);
2985     }
2986     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2987 
2988     /* read in my part of the matrix column indices  */
2989     nz   = procsnz[0];
2990     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2991     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2992 
2993     /* read in every one elses and ship off */
2994     for (i=1; i<size; i++) {
2995       nz   = procsnz[i];
2996       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2997       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2998     }
2999     ierr = PetscFree(cols);CHKERRQ(ierr);
3000   } else {
3001     /* determine buffer space needed for message */
3002     nz = 0;
3003     for (i=0; i<m; i++) {
3004       nz += ourlens[i];
3005     }
3006     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3007 
3008     /* receive message of column indices*/
3009     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3010   }
3011 
3012   /* determine column ownership if matrix is not square */
3013   if (N != M) {
3014     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3015     else n = newMat->cmap->n;
3016     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3017     cstart = cend - n;
3018   } else {
3019     cstart = rstart;
3020     cend   = rend;
3021     n      = cend - cstart;
3022   }
3023 
3024   /* loop over local rows, determining number of off diagonal entries */
3025   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3026   jj   = 0;
3027   for (i=0; i<m; i++) {
3028     for (j=0; j<ourlens[i]; j++) {
3029       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3030       jj++;
3031     }
3032   }
3033 
3034   for (i=0; i<m; i++) {
3035     ourlens[i] -= offlens[i];
3036   }
3037   if (!sizesset) {
3038     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3039   }
3040 
3041   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3042 
3043   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3044 
3045   for (i=0; i<m; i++) {
3046     ourlens[i] += offlens[i];
3047   }
3048 
3049   if (!rank) {
3050     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3051 
3052     /* read in my part of the matrix numerical values  */
3053     nz   = procsnz[0];
3054     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3055 
3056     /* insert into matrix */
3057     jj      = rstart;
3058     smycols = mycols;
3059     svals   = vals;
3060     for (i=0; i<m; i++) {
3061       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3062       smycols += ourlens[i];
3063       svals   += ourlens[i];
3064       jj++;
3065     }
3066 
3067     /* read in other processors and ship out */
3068     for (i=1; i<size; i++) {
3069       nz   = procsnz[i];
3070       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3071       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3072     }
3073     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3074   } else {
3075     /* receive numeric values */
3076     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3077 
3078     /* receive message of values*/
3079     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3080 
3081     /* insert into matrix */
3082     jj      = rstart;
3083     smycols = mycols;
3084     svals   = vals;
3085     for (i=0; i<m; i++) {
3086       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3087       smycols += ourlens[i];
3088       svals   += ourlens[i];
3089       jj++;
3090     }
3091   }
3092   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3093   ierr = PetscFree(vals);CHKERRQ(ierr);
3094   ierr = PetscFree(mycols);CHKERRQ(ierr);
3095   ierr = PetscFree(rowners);CHKERRQ(ierr);
3096   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3097   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3098   PetscFunctionReturn(0);
3099 }
3100 
3101 #undef __FUNCT__
3102 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3103 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3104 {
3105   PetscErrorCode ierr;
3106   IS             iscol_local;
3107   PetscInt       csize;
3108 
3109   PetscFunctionBegin;
3110   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3111   if (call == MAT_REUSE_MATRIX) {
3112     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3113     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3114   } else {
3115     PetscInt cbs;
3116     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3117     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3118     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3119   }
3120   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3121   if (call == MAT_INITIAL_MATRIX) {
3122     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3123     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3124   }
3125   PetscFunctionReturn(0);
3126 }
3127 
3128 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3129 #undef __FUNCT__
3130 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3131 /*
3132     Not great since it makes two copies of the submatrix, first an SeqAIJ
3133   in local and then by concatenating the local matrices the end result.
3134   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3135 
3136   Note: This requires a sequential iscol with all indices.
3137 */
3138 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3139 {
3140   PetscErrorCode ierr;
3141   PetscMPIInt    rank,size;
3142   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3143   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3144   PetscBool      allcolumns, colflag;
3145   Mat            M,Mreuse;
3146   MatScalar      *vwork,*aa;
3147   MPI_Comm       comm;
3148   Mat_SeqAIJ     *aij;
3149 
3150   PetscFunctionBegin;
3151   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3152   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3153   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3154 
3155   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3156   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3157   if (colflag && ncol == mat->cmap->N) {
3158     allcolumns = PETSC_TRUE;
3159   } else {
3160     allcolumns = PETSC_FALSE;
3161   }
3162   if (call ==  MAT_REUSE_MATRIX) {
3163     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3164     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3165     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3166   } else {
3167     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3168   }
3169 
3170   /*
3171       m - number of local rows
3172       n - number of columns (same on all processors)
3173       rstart - first row in new global matrix generated
3174   */
3175   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3176   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3177   if (call == MAT_INITIAL_MATRIX) {
3178     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3179     ii  = aij->i;
3180     jj  = aij->j;
3181 
3182     /*
3183         Determine the number of non-zeros in the diagonal and off-diagonal
3184         portions of the matrix in order to do correct preallocation
3185     */
3186 
3187     /* first get start and end of "diagonal" columns */
3188     if (csize == PETSC_DECIDE) {
3189       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3190       if (mglobal == n) { /* square matrix */
3191         nlocal = m;
3192       } else {
3193         nlocal = n/size + ((n % size) > rank);
3194       }
3195     } else {
3196       nlocal = csize;
3197     }
3198     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3199     rstart = rend - nlocal;
3200     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3201 
3202     /* next, compute all the lengths */
3203     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3204     olens = dlens + m;
3205     for (i=0; i<m; i++) {
3206       jend = ii[i+1] - ii[i];
3207       olen = 0;
3208       dlen = 0;
3209       for (j=0; j<jend; j++) {
3210         if (*jj < rstart || *jj >= rend) olen++;
3211         else dlen++;
3212         jj++;
3213       }
3214       olens[i] = olen;
3215       dlens[i] = dlen;
3216     }
3217     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3218     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3219     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3220     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3221     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3222     ierr = PetscFree(dlens);CHKERRQ(ierr);
3223   } else {
3224     PetscInt ml,nl;
3225 
3226     M    = *newmat;
3227     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3228     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3229     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3230     /*
3231          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3232        rather than the slower MatSetValues().
3233     */
3234     M->was_assembled = PETSC_TRUE;
3235     M->assembled     = PETSC_FALSE;
3236   }
3237   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3238   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3239   ii   = aij->i;
3240   jj   = aij->j;
3241   aa   = aij->a;
3242   for (i=0; i<m; i++) {
3243     row   = rstart + i;
3244     nz    = ii[i+1] - ii[i];
3245     cwork = jj;     jj += nz;
3246     vwork = aa;     aa += nz;
3247     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3248   }
3249 
3250   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3251   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3252   *newmat = M;
3253 
3254   /* save submatrix used in processor for next request */
3255   if (call ==  MAT_INITIAL_MATRIX) {
3256     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3257     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3258   }
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 #undef __FUNCT__
3263 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3264 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3265 {
3266   PetscInt       m,cstart, cend,j,nnz,i,d;
3267   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3268   const PetscInt *JJ;
3269   PetscScalar    *values;
3270   PetscErrorCode ierr;
3271 
3272   PetscFunctionBegin;
3273   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3274 
3275   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3276   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3277   m      = B->rmap->n;
3278   cstart = B->cmap->rstart;
3279   cend   = B->cmap->rend;
3280   rstart = B->rmap->rstart;
3281 
3282   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3283 
3284 #if defined(PETSC_USE_DEBUGGING)
3285   for (i=0; i<m; i++) {
3286     nnz = Ii[i+1]- Ii[i];
3287     JJ  = J + Ii[i];
3288     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3289     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3290     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3291   }
3292 #endif
3293 
3294   for (i=0; i<m; i++) {
3295     nnz     = Ii[i+1]- Ii[i];
3296     JJ      = J + Ii[i];
3297     nnz_max = PetscMax(nnz_max,nnz);
3298     d       = 0;
3299     for (j=0; j<nnz; j++) {
3300       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3301     }
3302     d_nnz[i] = d;
3303     o_nnz[i] = nnz - d;
3304   }
3305   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3306   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3307 
3308   if (v) values = (PetscScalar*)v;
3309   else {
3310     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3311   }
3312 
3313   for (i=0; i<m; i++) {
3314     ii   = i + rstart;
3315     nnz  = Ii[i+1]- Ii[i];
3316     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3317   }
3318   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3319   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3320 
3321   if (!v) {
3322     ierr = PetscFree(values);CHKERRQ(ierr);
3323   }
3324   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3325   PetscFunctionReturn(0);
3326 }
3327 
3328 #undef __FUNCT__
3329 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3330 /*@
3331    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3332    (the default parallel PETSc format).
3333 
3334    Collective on MPI_Comm
3335 
3336    Input Parameters:
3337 +  B - the matrix
3338 .  i - the indices into j for the start of each local row (starts with zero)
3339 .  j - the column indices for each local row (starts with zero)
3340 -  v - optional values in the matrix
3341 
3342    Level: developer
3343 
3344    Notes:
3345        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3346      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3347      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3348 
3349        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3350 
3351        The format which is used for the sparse matrix input, is equivalent to a
3352     row-major ordering.. i.e for the following matrix, the input data expected is
3353     as shown:
3354 
3355         1 0 0
3356         2 0 3     P0
3357        -------
3358         4 5 6     P1
3359 
3360      Process0 [P0]: rows_owned=[0,1]
3361         i =  {0,1,3}  [size = nrow+1  = 2+1]
3362         j =  {0,0,2}  [size = nz = 6]
3363         v =  {1,2,3}  [size = nz = 6]
3364 
3365      Process1 [P1]: rows_owned=[2]
3366         i =  {0,3}    [size = nrow+1  = 1+1]
3367         j =  {0,1,2}  [size = nz = 6]
3368         v =  {4,5,6}  [size = nz = 6]
3369 
3370 .keywords: matrix, aij, compressed row, sparse, parallel
3371 
3372 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3373           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3374 @*/
3375 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3376 {
3377   PetscErrorCode ierr;
3378 
3379   PetscFunctionBegin;
3380   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3381   PetscFunctionReturn(0);
3382 }
3383 
3384 #undef __FUNCT__
3385 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3386 /*@C
3387    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3388    (the default parallel PETSc format).  For good matrix assembly performance
3389    the user should preallocate the matrix storage by setting the parameters
3390    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3391    performance can be increased by more than a factor of 50.
3392 
3393    Collective on MPI_Comm
3394 
3395    Input Parameters:
3396 +  B - the matrix
3397 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3398            (same value is used for all local rows)
3399 .  d_nnz - array containing the number of nonzeros in the various rows of the
3400            DIAGONAL portion of the local submatrix (possibly different for each row)
3401            or NULL, if d_nz is used to specify the nonzero structure.
3402            The size of this array is equal to the number of local rows, i.e 'm'.
3403            For matrices that will be factored, you must leave room for (and set)
3404            the diagonal entry even if it is zero.
3405 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3406            submatrix (same value is used for all local rows).
3407 -  o_nnz - array containing the number of nonzeros in the various rows of the
3408            OFF-DIAGONAL portion of the local submatrix (possibly different for
3409            each row) or NULL, if o_nz is used to specify the nonzero
3410            structure. The size of this array is equal to the number
3411            of local rows, i.e 'm'.
3412 
3413    If the *_nnz parameter is given then the *_nz parameter is ignored
3414 
3415    The AIJ format (also called the Yale sparse matrix format or
3416    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3417    storage.  The stored row and column indices begin with zero.
3418    See Users-Manual: ch_mat for details.
3419 
3420    The parallel matrix is partitioned such that the first m0 rows belong to
3421    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3422    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3423 
3424    The DIAGONAL portion of the local submatrix of a processor can be defined
3425    as the submatrix which is obtained by extraction the part corresponding to
3426    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3427    first row that belongs to the processor, r2 is the last row belonging to
3428    the this processor, and c1-c2 is range of indices of the local part of a
3429    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3430    common case of a square matrix, the row and column ranges are the same and
3431    the DIAGONAL part is also square. The remaining portion of the local
3432    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3433 
3434    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3435 
3436    You can call MatGetInfo() to get information on how effective the preallocation was;
3437    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3438    You can also run with the option -info and look for messages with the string
3439    malloc in them to see if additional memory allocation was needed.
3440 
3441    Example usage:
3442 
3443    Consider the following 8x8 matrix with 34 non-zero values, that is
3444    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3445    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3446    as follows:
3447 
3448 .vb
3449             1  2  0  |  0  3  0  |  0  4
3450     Proc0   0  5  6  |  7  0  0  |  8  0
3451             9  0 10  | 11  0  0  | 12  0
3452     -------------------------------------
3453            13  0 14  | 15 16 17  |  0  0
3454     Proc1   0 18  0  | 19 20 21  |  0  0
3455             0  0  0  | 22 23  0  | 24  0
3456     -------------------------------------
3457     Proc2  25 26 27  |  0  0 28  | 29  0
3458            30  0  0  | 31 32 33  |  0 34
3459 .ve
3460 
3461    This can be represented as a collection of submatrices as:
3462 
3463 .vb
3464       A B C
3465       D E F
3466       G H I
3467 .ve
3468 
3469    Where the submatrices A,B,C are owned by proc0, D,E,F are
3470    owned by proc1, G,H,I are owned by proc2.
3471 
3472    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3473    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3474    The 'M','N' parameters are 8,8, and have the same values on all procs.
3475 
3476    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3477    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3478    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3479    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3480    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3481    matrix, ans [DF] as another SeqAIJ matrix.
3482 
3483    When d_nz, o_nz parameters are specified, d_nz storage elements are
3484    allocated for every row of the local diagonal submatrix, and o_nz
3485    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3486    One way to choose d_nz and o_nz is to use the max nonzerors per local
3487    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3488    In this case, the values of d_nz,o_nz are:
3489 .vb
3490      proc0 : dnz = 2, o_nz = 2
3491      proc1 : dnz = 3, o_nz = 2
3492      proc2 : dnz = 1, o_nz = 4
3493 .ve
3494    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3495    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3496    for proc3. i.e we are using 12+15+10=37 storage locations to store
3497    34 values.
3498 
3499    When d_nnz, o_nnz parameters are specified, the storage is specified
3500    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3501    In the above case the values for d_nnz,o_nnz are:
3502 .vb
3503      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3504      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3505      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3506 .ve
3507    Here the space allocated is sum of all the above values i.e 34, and
3508    hence pre-allocation is perfect.
3509 
3510    Level: intermediate
3511 
3512 .keywords: matrix, aij, compressed row, sparse, parallel
3513 
3514 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3515           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3516 @*/
3517 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3518 {
3519   PetscErrorCode ierr;
3520 
3521   PetscFunctionBegin;
3522   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3523   PetscValidType(B,1);
3524   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3525   PetscFunctionReturn(0);
3526 }
3527 
3528 #undef __FUNCT__
3529 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3530 /*@
3531      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3532          CSR format the local rows.
3533 
3534    Collective on MPI_Comm
3535 
3536    Input Parameters:
3537 +  comm - MPI communicator
3538 .  m - number of local rows (Cannot be PETSC_DECIDE)
3539 .  n - This value should be the same as the local size used in creating the
3540        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3541        calculated if N is given) For square matrices n is almost always m.
3542 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3543 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3544 .   i - row indices
3545 .   j - column indices
3546 -   a - matrix values
3547 
3548    Output Parameter:
3549 .   mat - the matrix
3550 
3551    Level: intermediate
3552 
3553    Notes:
3554        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3555      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3556      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3557 
3558        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3559 
3560        The format which is used for the sparse matrix input, is equivalent to a
3561     row-major ordering.. i.e for the following matrix, the input data expected is
3562     as shown:
3563 
3564         1 0 0
3565         2 0 3     P0
3566        -------
3567         4 5 6     P1
3568 
3569      Process0 [P0]: rows_owned=[0,1]
3570         i =  {0,1,3}  [size = nrow+1  = 2+1]
3571         j =  {0,0,2}  [size = nz = 6]
3572         v =  {1,2,3}  [size = nz = 6]
3573 
3574      Process1 [P1]: rows_owned=[2]
3575         i =  {0,3}    [size = nrow+1  = 1+1]
3576         j =  {0,1,2}  [size = nz = 6]
3577         v =  {4,5,6}  [size = nz = 6]
3578 
3579 .keywords: matrix, aij, compressed row, sparse, parallel
3580 
3581 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3582           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3583 @*/
3584 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3585 {
3586   PetscErrorCode ierr;
3587 
3588   PetscFunctionBegin;
3589   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3590   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3591   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3592   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3593   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3594   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3595   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3596   PetscFunctionReturn(0);
3597 }
3598 
3599 #undef __FUNCT__
3600 #define __FUNCT__ "MatCreateAIJ"
3601 /*@C
3602    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3603    (the default parallel PETSc format).  For good matrix assembly performance
3604    the user should preallocate the matrix storage by setting the parameters
3605    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3606    performance can be increased by more than a factor of 50.
3607 
3608    Collective on MPI_Comm
3609 
3610    Input Parameters:
3611 +  comm - MPI communicator
3612 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3613            This value should be the same as the local size used in creating the
3614            y vector for the matrix-vector product y = Ax.
3615 .  n - This value should be the same as the local size used in creating the
3616        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3617        calculated if N is given) For square matrices n is almost always m.
3618 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3619 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3620 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3621            (same value is used for all local rows)
3622 .  d_nnz - array containing the number of nonzeros in the various rows of the
3623            DIAGONAL portion of the local submatrix (possibly different for each row)
3624            or NULL, if d_nz is used to specify the nonzero structure.
3625            The size of this array is equal to the number of local rows, i.e 'm'.
3626 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3627            submatrix (same value is used for all local rows).
3628 -  o_nnz - array containing the number of nonzeros in the various rows of the
3629            OFF-DIAGONAL portion of the local submatrix (possibly different for
3630            each row) or NULL, if o_nz is used to specify the nonzero
3631            structure. The size of this array is equal to the number
3632            of local rows, i.e 'm'.
3633 
3634    Output Parameter:
3635 .  A - the matrix
3636 
3637    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3638    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3639    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3640 
3641    Notes:
3642    If the *_nnz parameter is given then the *_nz parameter is ignored
3643 
3644    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3645    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3646    storage requirements for this matrix.
3647 
3648    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3649    processor than it must be used on all processors that share the object for
3650    that argument.
3651 
3652    The user MUST specify either the local or global matrix dimensions
3653    (possibly both).
3654 
3655    The parallel matrix is partitioned across processors such that the
3656    first m0 rows belong to process 0, the next m1 rows belong to
3657    process 1, the next m2 rows belong to process 2 etc.. where
3658    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3659    values corresponding to [m x N] submatrix.
3660 
3661    The columns are logically partitioned with the n0 columns belonging
3662    to 0th partition, the next n1 columns belonging to the next
3663    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3664 
3665    The DIAGONAL portion of the local submatrix on any given processor
3666    is the submatrix corresponding to the rows and columns m,n
3667    corresponding to the given processor. i.e diagonal matrix on
3668    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3669    etc. The remaining portion of the local submatrix [m x (N-n)]
3670    constitute the OFF-DIAGONAL portion. The example below better
3671    illustrates this concept.
3672 
3673    For a square global matrix we define each processor's diagonal portion
3674    to be its local rows and the corresponding columns (a square submatrix);
3675    each processor's off-diagonal portion encompasses the remainder of the
3676    local matrix (a rectangular submatrix).
3677 
3678    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3679 
3680    When calling this routine with a single process communicator, a matrix of
3681    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3682    type of communicator, use the construction mechanism:
3683      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3684 
3685    By default, this format uses inodes (identical nodes) when possible.
3686    We search for consecutive rows with the same nonzero structure, thereby
3687    reusing matrix information to achieve increased efficiency.
3688 
3689    Options Database Keys:
3690 +  -mat_no_inode  - Do not use inodes
3691 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3692 -  -mat_aij_oneindex - Internally use indexing starting at 1
3693         rather than 0.  Note that when calling MatSetValues(),
3694         the user still MUST index entries starting at 0!
3695 
3696 
3697    Example usage:
3698 
3699    Consider the following 8x8 matrix with 34 non-zero values, that is
3700    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3701    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3702    as follows:
3703 
3704 .vb
3705             1  2  0  |  0  3  0  |  0  4
3706     Proc0   0  5  6  |  7  0  0  |  8  0
3707             9  0 10  | 11  0  0  | 12  0
3708     -------------------------------------
3709            13  0 14  | 15 16 17  |  0  0
3710     Proc1   0 18  0  | 19 20 21  |  0  0
3711             0  0  0  | 22 23  0  | 24  0
3712     -------------------------------------
3713     Proc2  25 26 27  |  0  0 28  | 29  0
3714            30  0  0  | 31 32 33  |  0 34
3715 .ve
3716 
3717    This can be represented as a collection of submatrices as:
3718 
3719 .vb
3720       A B C
3721       D E F
3722       G H I
3723 .ve
3724 
3725    Where the submatrices A,B,C are owned by proc0, D,E,F are
3726    owned by proc1, G,H,I are owned by proc2.
3727 
3728    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3729    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3730    The 'M','N' parameters are 8,8, and have the same values on all procs.
3731 
3732    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3733    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3734    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3735    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3736    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3737    matrix, ans [DF] as another SeqAIJ matrix.
3738 
3739    When d_nz, o_nz parameters are specified, d_nz storage elements are
3740    allocated for every row of the local diagonal submatrix, and o_nz
3741    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3742    One way to choose d_nz and o_nz is to use the max nonzerors per local
3743    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3744    In this case, the values of d_nz,o_nz are:
3745 .vb
3746      proc0 : dnz = 2, o_nz = 2
3747      proc1 : dnz = 3, o_nz = 2
3748      proc2 : dnz = 1, o_nz = 4
3749 .ve
3750    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3751    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3752    for proc3. i.e we are using 12+15+10=37 storage locations to store
3753    34 values.
3754 
3755    When d_nnz, o_nnz parameters are specified, the storage is specified
3756    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3757    In the above case the values for d_nnz,o_nnz are:
3758 .vb
3759      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3760      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3761      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3762 .ve
3763    Here the space allocated is sum of all the above values i.e 34, and
3764    hence pre-allocation is perfect.
3765 
3766    Level: intermediate
3767 
3768 .keywords: matrix, aij, compressed row, sparse, parallel
3769 
3770 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3771           MPIAIJ, MatCreateMPIAIJWithArrays()
3772 @*/
3773 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3774 {
3775   PetscErrorCode ierr;
3776   PetscMPIInt    size;
3777 
3778   PetscFunctionBegin;
3779   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3780   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3781   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3782   if (size > 1) {
3783     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3784     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3785   } else {
3786     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3787     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3788   }
3789   PetscFunctionReturn(0);
3790 }
3791 
3792 #undef __FUNCT__
3793 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3794 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3795 {
3796   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3797 
3798   PetscFunctionBegin;
3799   if (Ad)     *Ad     = a->A;
3800   if (Ao)     *Ao     = a->B;
3801   if (colmap) *colmap = a->garray;
3802   PetscFunctionReturn(0);
3803 }
3804 
3805 #undef __FUNCT__
3806 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3807 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3808 {
3809   PetscErrorCode ierr;
3810   PetscInt       i;
3811   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3812 
3813   PetscFunctionBegin;
3814   if (coloring->ctype == IS_COLORING_GLOBAL) {
3815     ISColoringValue *allcolors,*colors;
3816     ISColoring      ocoloring;
3817 
3818     /* set coloring for diagonal portion */
3819     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3820 
3821     /* set coloring for off-diagonal portion */
3822     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3823     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
3824     for (i=0; i<a->B->cmap->n; i++) {
3825       colors[i] = allcolors[a->garray[i]];
3826     }
3827     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3828     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3829     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3830     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3831   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3832     ISColoringValue *colors;
3833     PetscInt        *larray;
3834     ISColoring      ocoloring;
3835 
3836     /* set coloring for diagonal portion */
3837     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
3838     for (i=0; i<a->A->cmap->n; i++) {
3839       larray[i] = i + A->cmap->rstart;
3840     }
3841     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3842     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
3843     for (i=0; i<a->A->cmap->n; i++) {
3844       colors[i] = coloring->colors[larray[i]];
3845     }
3846     ierr = PetscFree(larray);CHKERRQ(ierr);
3847     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3848     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3849     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3850 
3851     /* set coloring for off-diagonal portion */
3852     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
3853     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3854     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
3855     for (i=0; i<a->B->cmap->n; i++) {
3856       colors[i] = coloring->colors[larray[i]];
3857     }
3858     ierr = PetscFree(larray);CHKERRQ(ierr);
3859     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3860     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3861     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3862   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3863   PetscFunctionReturn(0);
3864 }
3865 
3866 #undef __FUNCT__
3867 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3868 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3869 {
3870   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3871   PetscErrorCode ierr;
3872 
3873   PetscFunctionBegin;
3874   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3875   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3876   PetscFunctionReturn(0);
3877 }
3878 
3879 #undef __FUNCT__
3880 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
3881 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
3882 {
3883   PetscErrorCode ierr;
3884   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
3885   PetscInt       *indx;
3886 
3887   PetscFunctionBegin;
3888   /* This routine will ONLY return MPIAIJ type matrix */
3889   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3890   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3891   if (n == PETSC_DECIDE) {
3892     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3893   }
3894   /* Check sum(n) = N */
3895   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3896   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3897 
3898   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3899   rstart -= m;
3900 
3901   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3902   for (i=0; i<m; i++) {
3903     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3904     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3905     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3906   }
3907 
3908   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3909   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3910   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3911   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3912   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3913   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3914   PetscFunctionReturn(0);
3915 }
3916 
3917 #undef __FUNCT__
3918 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
3919 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
3920 {
3921   PetscErrorCode ierr;
3922   PetscInt       m,N,i,rstart,nnz,Ii;
3923   PetscInt       *indx;
3924   PetscScalar    *values;
3925 
3926   PetscFunctionBegin;
3927   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3928   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
3929   for (i=0; i<m; i++) {
3930     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3931     Ii   = i + rstart;
3932     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3933     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3934   }
3935   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3936   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3937   PetscFunctionReturn(0);
3938 }
3939 
3940 #undef __FUNCT__
3941 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3942 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3943 {
3944   PetscErrorCode ierr;
3945   PetscMPIInt    size;
3946 
3947   PetscFunctionBegin;
3948   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3949   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
3950   if (size == 1) {
3951     if (scall == MAT_INITIAL_MATRIX) {
3952       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
3953     } else {
3954       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3955     }
3956   } else {
3957     if (scall == MAT_INITIAL_MATRIX) {
3958       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
3959     }
3960     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
3961   }
3962   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
3963   PetscFunctionReturn(0);
3964 }
3965 
3966 #undef __FUNCT__
3967 #define __FUNCT__ "MatFileSplit"
3968 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3969 {
3970   PetscErrorCode    ierr;
3971   PetscMPIInt       rank;
3972   PetscInt          m,N,i,rstart,nnz;
3973   size_t            len;
3974   const PetscInt    *indx;
3975   PetscViewer       out;
3976   char              *name;
3977   Mat               B;
3978   const PetscScalar *values;
3979 
3980   PetscFunctionBegin;
3981   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3982   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3983   /* Should this be the type of the diagonal block of A? */
3984   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3985   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3986   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3987   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3988   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3989   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3990   for (i=0; i<m; i++) {
3991     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3992     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3993     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3994   }
3995   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3996   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3997 
3998   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3999   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4000   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4001   sprintf(name,"%s.%d",outfile,rank);
4002   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4003   ierr = PetscFree(name);CHKERRQ(ierr);
4004   ierr = MatView(B,out);CHKERRQ(ierr);
4005   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4006   ierr = MatDestroy(&B);CHKERRQ(ierr);
4007   PetscFunctionReturn(0);
4008 }
4009 
4010 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4011 #undef __FUNCT__
4012 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4013 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4014 {
4015   PetscErrorCode      ierr;
4016   Mat_Merge_SeqsToMPI *merge;
4017   PetscContainer      container;
4018 
4019   PetscFunctionBegin;
4020   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4021   if (container) {
4022     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4023     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4024     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4025     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4026     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4027     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4028     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4029     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4030     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4031     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4032     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4033     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4034     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4035     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4036     ierr = PetscFree(merge);CHKERRQ(ierr);
4037     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4038   }
4039   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4040   PetscFunctionReturn(0);
4041 }
4042 
4043 #include <../src/mat/utils/freespace.h>
4044 #include <petscbt.h>
4045 
4046 #undef __FUNCT__
4047 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4048 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4049 {
4050   PetscErrorCode      ierr;
4051   MPI_Comm            comm;
4052   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4053   PetscMPIInt         size,rank,taga,*len_s;
4054   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4055   PetscInt            proc,m;
4056   PetscInt            **buf_ri,**buf_rj;
4057   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4058   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4059   MPI_Request         *s_waits,*r_waits;
4060   MPI_Status          *status;
4061   MatScalar           *aa=a->a;
4062   MatScalar           **abuf_r,*ba_i;
4063   Mat_Merge_SeqsToMPI *merge;
4064   PetscContainer      container;
4065 
4066   PetscFunctionBegin;
4067   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4068   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4069 
4070   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4071   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4072 
4073   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4074   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4075 
4076   bi     = merge->bi;
4077   bj     = merge->bj;
4078   buf_ri = merge->buf_ri;
4079   buf_rj = merge->buf_rj;
4080 
4081   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4082   owners = merge->rowmap->range;
4083   len_s  = merge->len_s;
4084 
4085   /* send and recv matrix values */
4086   /*-----------------------------*/
4087   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4088   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4089 
4090   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4091   for (proc=0,k=0; proc<size; proc++) {
4092     if (!len_s[proc]) continue;
4093     i    = owners[proc];
4094     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4095     k++;
4096   }
4097 
4098   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4099   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4100   ierr = PetscFree(status);CHKERRQ(ierr);
4101 
4102   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4103   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4104 
4105   /* insert mat values of mpimat */
4106   /*----------------------------*/
4107   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4108   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4109 
4110   for (k=0; k<merge->nrecv; k++) {
4111     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4112     nrows       = *(buf_ri_k[k]);
4113     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4114     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4115   }
4116 
4117   /* set values of ba */
4118   m = merge->rowmap->n;
4119   for (i=0; i<m; i++) {
4120     arow = owners[rank] + i;
4121     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4122     bnzi = bi[i+1] - bi[i];
4123     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4124 
4125     /* add local non-zero vals of this proc's seqmat into ba */
4126     anzi   = ai[arow+1] - ai[arow];
4127     aj     = a->j + ai[arow];
4128     aa     = a->a + ai[arow];
4129     nextaj = 0;
4130     for (j=0; nextaj<anzi; j++) {
4131       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4132         ba_i[j] += aa[nextaj++];
4133       }
4134     }
4135 
4136     /* add received vals into ba */
4137     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4138       /* i-th row */
4139       if (i == *nextrow[k]) {
4140         anzi   = *(nextai[k]+1) - *nextai[k];
4141         aj     = buf_rj[k] + *(nextai[k]);
4142         aa     = abuf_r[k] + *(nextai[k]);
4143         nextaj = 0;
4144         for (j=0; nextaj<anzi; j++) {
4145           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4146             ba_i[j] += aa[nextaj++];
4147           }
4148         }
4149         nextrow[k]++; nextai[k]++;
4150       }
4151     }
4152     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4153   }
4154   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4155   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4156 
4157   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4158   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4159   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4160   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4161   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4162   PetscFunctionReturn(0);
4163 }
4164 
4165 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4166 
4167 #undef __FUNCT__
4168 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4169 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4170 {
4171   PetscErrorCode      ierr;
4172   Mat                 B_mpi;
4173   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4174   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4175   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4176   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4177   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4178   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4179   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4180   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4181   MPI_Status          *status;
4182   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4183   PetscBT             lnkbt;
4184   Mat_Merge_SeqsToMPI *merge;
4185   PetscContainer      container;
4186 
4187   PetscFunctionBegin;
4188   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4189 
4190   /* make sure it is a PETSc comm */
4191   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4192   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4193   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4194 
4195   ierr = PetscNew(&merge);CHKERRQ(ierr);
4196   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4197 
4198   /* determine row ownership */
4199   /*---------------------------------------------------------*/
4200   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4201   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4202   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4203   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4204   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4205   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4206   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4207 
4208   m      = merge->rowmap->n;
4209   owners = merge->rowmap->range;
4210 
4211   /* determine the number of messages to send, their lengths */
4212   /*---------------------------------------------------------*/
4213   len_s = merge->len_s;
4214 
4215   len          = 0; /* length of buf_si[] */
4216   merge->nsend = 0;
4217   for (proc=0; proc<size; proc++) {
4218     len_si[proc] = 0;
4219     if (proc == rank) {
4220       len_s[proc] = 0;
4221     } else {
4222       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4223       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4224     }
4225     if (len_s[proc]) {
4226       merge->nsend++;
4227       nrows = 0;
4228       for (i=owners[proc]; i<owners[proc+1]; i++) {
4229         if (ai[i+1] > ai[i]) nrows++;
4230       }
4231       len_si[proc] = 2*(nrows+1);
4232       len         += len_si[proc];
4233     }
4234   }
4235 
4236   /* determine the number and length of messages to receive for ij-structure */
4237   /*-------------------------------------------------------------------------*/
4238   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4239   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4240 
4241   /* post the Irecv of j-structure */
4242   /*-------------------------------*/
4243   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4244   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4245 
4246   /* post the Isend of j-structure */
4247   /*--------------------------------*/
4248   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4249 
4250   for (proc=0, k=0; proc<size; proc++) {
4251     if (!len_s[proc]) continue;
4252     i    = owners[proc];
4253     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4254     k++;
4255   }
4256 
4257   /* receives and sends of j-structure are complete */
4258   /*------------------------------------------------*/
4259   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4260   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4261 
4262   /* send and recv i-structure */
4263   /*---------------------------*/
4264   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4265   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4266 
4267   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4268   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4269   for (proc=0,k=0; proc<size; proc++) {
4270     if (!len_s[proc]) continue;
4271     /* form outgoing message for i-structure:
4272          buf_si[0]:                 nrows to be sent
4273                [1:nrows]:           row index (global)
4274                [nrows+1:2*nrows+1]: i-structure index
4275     */
4276     /*-------------------------------------------*/
4277     nrows       = len_si[proc]/2 - 1;
4278     buf_si_i    = buf_si + nrows+1;
4279     buf_si[0]   = nrows;
4280     buf_si_i[0] = 0;
4281     nrows       = 0;
4282     for (i=owners[proc]; i<owners[proc+1]; i++) {
4283       anzi = ai[i+1] - ai[i];
4284       if (anzi) {
4285         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4286         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4287         nrows++;
4288       }
4289     }
4290     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4291     k++;
4292     buf_si += len_si[proc];
4293   }
4294 
4295   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4296   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4297 
4298   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4299   for (i=0; i<merge->nrecv; i++) {
4300     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4301   }
4302 
4303   ierr = PetscFree(len_si);CHKERRQ(ierr);
4304   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4305   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4306   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4307   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4308   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4309   ierr = PetscFree(status);CHKERRQ(ierr);
4310 
4311   /* compute a local seq matrix in each processor */
4312   /*----------------------------------------------*/
4313   /* allocate bi array and free space for accumulating nonzero column info */
4314   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4315   bi[0] = 0;
4316 
4317   /* create and initialize a linked list */
4318   nlnk = N+1;
4319   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4320 
4321   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4322   len  = ai[owners[rank+1]] - ai[owners[rank]];
4323   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4324 
4325   current_space = free_space;
4326 
4327   /* determine symbolic info for each local row */
4328   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4329 
4330   for (k=0; k<merge->nrecv; k++) {
4331     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4332     nrows       = *buf_ri_k[k];
4333     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4334     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4335   }
4336 
4337   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4338   len  = 0;
4339   for (i=0; i<m; i++) {
4340     bnzi = 0;
4341     /* add local non-zero cols of this proc's seqmat into lnk */
4342     arow  = owners[rank] + i;
4343     anzi  = ai[arow+1] - ai[arow];
4344     aj    = a->j + ai[arow];
4345     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4346     bnzi += nlnk;
4347     /* add received col data into lnk */
4348     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4349       if (i == *nextrow[k]) { /* i-th row */
4350         anzi  = *(nextai[k]+1) - *nextai[k];
4351         aj    = buf_rj[k] + *nextai[k];
4352         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4353         bnzi += nlnk;
4354         nextrow[k]++; nextai[k]++;
4355       }
4356     }
4357     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4358 
4359     /* if free space is not available, make more free space */
4360     if (current_space->local_remaining<bnzi) {
4361       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4362       nspacedouble++;
4363     }
4364     /* copy data into free space, then initialize lnk */
4365     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4366     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4367 
4368     current_space->array           += bnzi;
4369     current_space->local_used      += bnzi;
4370     current_space->local_remaining -= bnzi;
4371 
4372     bi[i+1] = bi[i] + bnzi;
4373   }
4374 
4375   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4376 
4377   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4378   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4379   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4380 
4381   /* create symbolic parallel matrix B_mpi */
4382   /*---------------------------------------*/
4383   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4384   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4385   if (n==PETSC_DECIDE) {
4386     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4387   } else {
4388     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4389   }
4390   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4391   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4392   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4393   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4394   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4395 
4396   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4397   B_mpi->assembled    = PETSC_FALSE;
4398   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4399   merge->bi           = bi;
4400   merge->bj           = bj;
4401   merge->buf_ri       = buf_ri;
4402   merge->buf_rj       = buf_rj;
4403   merge->coi          = NULL;
4404   merge->coj          = NULL;
4405   merge->owners_co    = NULL;
4406 
4407   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4408 
4409   /* attach the supporting struct to B_mpi for reuse */
4410   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4411   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4412   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4413   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4414   *mpimat = B_mpi;
4415 
4416   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4417   PetscFunctionReturn(0);
4418 }
4419 
4420 #undef __FUNCT__
4421 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4422 /*@C
4423       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4424                  matrices from each processor
4425 
4426     Collective on MPI_Comm
4427 
4428    Input Parameters:
4429 +    comm - the communicators the parallel matrix will live on
4430 .    seqmat - the input sequential matrices
4431 .    m - number of local rows (or PETSC_DECIDE)
4432 .    n - number of local columns (or PETSC_DECIDE)
4433 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4434 
4435    Output Parameter:
4436 .    mpimat - the parallel matrix generated
4437 
4438     Level: advanced
4439 
4440    Notes:
4441      The dimensions of the sequential matrix in each processor MUST be the same.
4442      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4443      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4444 @*/
4445 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4446 {
4447   PetscErrorCode ierr;
4448   PetscMPIInt    size;
4449 
4450   PetscFunctionBegin;
4451   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4452   if (size == 1) {
4453     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4454     if (scall == MAT_INITIAL_MATRIX) {
4455       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4456     } else {
4457       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4458     }
4459     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4460     PetscFunctionReturn(0);
4461   }
4462   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4463   if (scall == MAT_INITIAL_MATRIX) {
4464     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4465   }
4466   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4467   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4468   PetscFunctionReturn(0);
4469 }
4470 
4471 #undef __FUNCT__
4472 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4473 /*@
4474      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4475           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4476           with MatGetSize()
4477 
4478     Not Collective
4479 
4480    Input Parameters:
4481 +    A - the matrix
4482 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4483 
4484    Output Parameter:
4485 .    A_loc - the local sequential matrix generated
4486 
4487     Level: developer
4488 
4489 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4490 
4491 @*/
4492 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4493 {
4494   PetscErrorCode ierr;
4495   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4496   Mat_SeqAIJ     *mat,*a,*b;
4497   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4498   MatScalar      *aa,*ba,*cam;
4499   PetscScalar    *ca;
4500   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4501   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4502   PetscBool      match;
4503   MPI_Comm       comm;
4504   PetscMPIInt    size;
4505 
4506   PetscFunctionBegin;
4507   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4508   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4509   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4510   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4511   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4512 
4513   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4514   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4515   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4516   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4517   aa = a->a; ba = b->a;
4518   if (scall == MAT_INITIAL_MATRIX) {
4519     if (size == 1) {
4520       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4521       PetscFunctionReturn(0);
4522     }
4523 
4524     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4525     ci[0] = 0;
4526     for (i=0; i<am; i++) {
4527       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4528     }
4529     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4530     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4531     k    = 0;
4532     for (i=0; i<am; i++) {
4533       ncols_o = bi[i+1] - bi[i];
4534       ncols_d = ai[i+1] - ai[i];
4535       /* off-diagonal portion of A */
4536       for (jo=0; jo<ncols_o; jo++) {
4537         col = cmap[*bj];
4538         if (col >= cstart) break;
4539         cj[k]   = col; bj++;
4540         ca[k++] = *ba++;
4541       }
4542       /* diagonal portion of A */
4543       for (j=0; j<ncols_d; j++) {
4544         cj[k]   = cstart + *aj++;
4545         ca[k++] = *aa++;
4546       }
4547       /* off-diagonal portion of A */
4548       for (j=jo; j<ncols_o; j++) {
4549         cj[k]   = cmap[*bj++];
4550         ca[k++] = *ba++;
4551       }
4552     }
4553     /* put together the new matrix */
4554     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4555     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4556     /* Since these are PETSc arrays, change flags to free them as necessary. */
4557     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4558     mat->free_a  = PETSC_TRUE;
4559     mat->free_ij = PETSC_TRUE;
4560     mat->nonew   = 0;
4561   } else if (scall == MAT_REUSE_MATRIX) {
4562     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4563     ci = mat->i; cj = mat->j; cam = mat->a;
4564     for (i=0; i<am; i++) {
4565       /* off-diagonal portion of A */
4566       ncols_o = bi[i+1] - bi[i];
4567       for (jo=0; jo<ncols_o; jo++) {
4568         col = cmap[*bj];
4569         if (col >= cstart) break;
4570         *cam++ = *ba++; bj++;
4571       }
4572       /* diagonal portion of A */
4573       ncols_d = ai[i+1] - ai[i];
4574       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4575       /* off-diagonal portion of A */
4576       for (j=jo; j<ncols_o; j++) {
4577         *cam++ = *ba++; bj++;
4578       }
4579     }
4580   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4581   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4582   PetscFunctionReturn(0);
4583 }
4584 
4585 #undef __FUNCT__
4586 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4587 /*@C
4588      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4589 
4590     Not Collective
4591 
4592    Input Parameters:
4593 +    A - the matrix
4594 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4595 -    row, col - index sets of rows and columns to extract (or NULL)
4596 
4597    Output Parameter:
4598 .    A_loc - the local sequential matrix generated
4599 
4600     Level: developer
4601 
4602 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4603 
4604 @*/
4605 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4606 {
4607   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4608   PetscErrorCode ierr;
4609   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4610   IS             isrowa,iscola;
4611   Mat            *aloc;
4612   PetscBool      match;
4613 
4614   PetscFunctionBegin;
4615   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4616   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4617   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4618   if (!row) {
4619     start = A->rmap->rstart; end = A->rmap->rend;
4620     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4621   } else {
4622     isrowa = *row;
4623   }
4624   if (!col) {
4625     start = A->cmap->rstart;
4626     cmap  = a->garray;
4627     nzA   = a->A->cmap->n;
4628     nzB   = a->B->cmap->n;
4629     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
4630     ncols = 0;
4631     for (i=0; i<nzB; i++) {
4632       if (cmap[i] < start) idx[ncols++] = cmap[i];
4633       else break;
4634     }
4635     imark = i;
4636     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4637     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4638     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4639   } else {
4640     iscola = *col;
4641   }
4642   if (scall != MAT_INITIAL_MATRIX) {
4643     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4644     aloc[0] = *A_loc;
4645   }
4646   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4647   *A_loc = aloc[0];
4648   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4649   if (!row) {
4650     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4651   }
4652   if (!col) {
4653     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4654   }
4655   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4656   PetscFunctionReturn(0);
4657 }
4658 
4659 #undef __FUNCT__
4660 #define __FUNCT__ "MatGetBrowsOfAcols"
4661 /*@C
4662     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4663 
4664     Collective on Mat
4665 
4666    Input Parameters:
4667 +    A,B - the matrices in mpiaij format
4668 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4669 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4670 
4671    Output Parameter:
4672 +    rowb, colb - index sets of rows and columns of B to extract
4673 -    B_seq - the sequential matrix generated
4674 
4675     Level: developer
4676 
4677 @*/
4678 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4679 {
4680   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4681   PetscErrorCode ierr;
4682   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4683   IS             isrowb,iscolb;
4684   Mat            *bseq=NULL;
4685 
4686   PetscFunctionBegin;
4687   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4688     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4689   }
4690   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4691 
4692   if (scall == MAT_INITIAL_MATRIX) {
4693     start = A->cmap->rstart;
4694     cmap  = a->garray;
4695     nzA   = a->A->cmap->n;
4696     nzB   = a->B->cmap->n;
4697     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
4698     ncols = 0;
4699     for (i=0; i<nzB; i++) {  /* row < local row index */
4700       if (cmap[i] < start) idx[ncols++] = cmap[i];
4701       else break;
4702     }
4703     imark = i;
4704     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4705     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4706     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4707     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4708   } else {
4709     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4710     isrowb  = *rowb; iscolb = *colb;
4711     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4712     bseq[0] = *B_seq;
4713   }
4714   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4715   *B_seq = bseq[0];
4716   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4717   if (!rowb) {
4718     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4719   } else {
4720     *rowb = isrowb;
4721   }
4722   if (!colb) {
4723     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4724   } else {
4725     *colb = iscolb;
4726   }
4727   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4728   PetscFunctionReturn(0);
4729 }
4730 
4731 #undef __FUNCT__
4732 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4733 /*
4734     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4735     of the OFF-DIAGONAL portion of local A
4736 
4737     Collective on Mat
4738 
4739    Input Parameters:
4740 +    A,B - the matrices in mpiaij format
4741 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4742 
4743    Output Parameter:
4744 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4745 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4746 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4747 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4748 
4749     Level: developer
4750 
4751 */
4752 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4753 {
4754   VecScatter_MPI_General *gen_to,*gen_from;
4755   PetscErrorCode         ierr;
4756   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4757   Mat_SeqAIJ             *b_oth;
4758   VecScatter             ctx =a->Mvctx;
4759   MPI_Comm               comm;
4760   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4761   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4762   PetscScalar            *rvalues,*svalues;
4763   MatScalar              *b_otha,*bufa,*bufA;
4764   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4765   MPI_Request            *rwaits = NULL,*swaits = NULL;
4766   MPI_Status             *sstatus,rstatus;
4767   PetscMPIInt            jj,size;
4768   PetscInt               *cols,sbs,rbs;
4769   PetscScalar            *vals;
4770 
4771   PetscFunctionBegin;
4772   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4773   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4774   if (size == 1) PetscFunctionReturn(0);
4775 
4776   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4777     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4778   }
4779   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4780   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4781 
4782   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4783   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4784   rvalues  = gen_from->values; /* holds the length of receiving row */
4785   svalues  = gen_to->values;   /* holds the length of sending row */
4786   nrecvs   = gen_from->n;
4787   nsends   = gen_to->n;
4788 
4789   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4790   srow    = gen_to->indices;    /* local row index to be sent */
4791   sstarts = gen_to->starts;
4792   sprocs  = gen_to->procs;
4793   sstatus = gen_to->sstatus;
4794   sbs     = gen_to->bs;
4795   rstarts = gen_from->starts;
4796   rprocs  = gen_from->procs;
4797   rbs     = gen_from->bs;
4798 
4799   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4800   if (scall == MAT_INITIAL_MATRIX) {
4801     /* i-array */
4802     /*---------*/
4803     /*  post receives */
4804     for (i=0; i<nrecvs; i++) {
4805       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4806       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4807       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4808     }
4809 
4810     /* pack the outgoing message */
4811     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4812 
4813     sstartsj[0] = 0;
4814     rstartsj[0] = 0;
4815     len         = 0; /* total length of j or a array to be sent */
4816     k           = 0;
4817     for (i=0; i<nsends; i++) {
4818       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4819       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4820       for (j=0; j<nrows; j++) {
4821         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4822         for (l=0; l<sbs; l++) {
4823           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4824 
4825           rowlen[j*sbs+l] = ncols;
4826 
4827           len += ncols;
4828           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4829         }
4830         k++;
4831       }
4832       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4833 
4834       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4835     }
4836     /* recvs and sends of i-array are completed */
4837     i = nrecvs;
4838     while (i--) {
4839       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4840     }
4841     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4842 
4843     /* allocate buffers for sending j and a arrays */
4844     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
4845     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
4846 
4847     /* create i-array of B_oth */
4848     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
4849 
4850     b_othi[0] = 0;
4851     len       = 0; /* total length of j or a array to be received */
4852     k         = 0;
4853     for (i=0; i<nrecvs; i++) {
4854       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4855       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4856       for (j=0; j<nrows; j++) {
4857         b_othi[k+1] = b_othi[k] + rowlen[j];
4858         len        += rowlen[j]; k++;
4859       }
4860       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4861     }
4862 
4863     /* allocate space for j and a arrrays of B_oth */
4864     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
4865     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
4866 
4867     /* j-array */
4868     /*---------*/
4869     /*  post receives of j-array */
4870     for (i=0; i<nrecvs; i++) {
4871       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4872       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4873     }
4874 
4875     /* pack the outgoing message j-array */
4876     k = 0;
4877     for (i=0; i<nsends; i++) {
4878       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4879       bufJ  = bufj+sstartsj[i];
4880       for (j=0; j<nrows; j++) {
4881         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4882         for (ll=0; ll<sbs; ll++) {
4883           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4884           for (l=0; l<ncols; l++) {
4885             *bufJ++ = cols[l];
4886           }
4887           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4888         }
4889       }
4890       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4891     }
4892 
4893     /* recvs and sends of j-array are completed */
4894     i = nrecvs;
4895     while (i--) {
4896       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4897     }
4898     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4899   } else if (scall == MAT_REUSE_MATRIX) {
4900     sstartsj = *startsj_s;
4901     rstartsj = *startsj_r;
4902     bufa     = *bufa_ptr;
4903     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4904     b_otha   = b_oth->a;
4905   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4906 
4907   /* a-array */
4908   /*---------*/
4909   /*  post receives of a-array */
4910   for (i=0; i<nrecvs; i++) {
4911     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4912     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4913   }
4914 
4915   /* pack the outgoing message a-array */
4916   k = 0;
4917   for (i=0; i<nsends; i++) {
4918     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4919     bufA  = bufa+sstartsj[i];
4920     for (j=0; j<nrows; j++) {
4921       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4922       for (ll=0; ll<sbs; ll++) {
4923         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4924         for (l=0; l<ncols; l++) {
4925           *bufA++ = vals[l];
4926         }
4927         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4928       }
4929     }
4930     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4931   }
4932   /* recvs and sends of a-array are completed */
4933   i = nrecvs;
4934   while (i--) {
4935     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4936   }
4937   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4938   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4939 
4940   if (scall == MAT_INITIAL_MATRIX) {
4941     /* put together the new matrix */
4942     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4943 
4944     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4945     /* Since these are PETSc arrays, change flags to free them as necessary. */
4946     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4947     b_oth->free_a  = PETSC_TRUE;
4948     b_oth->free_ij = PETSC_TRUE;
4949     b_oth->nonew   = 0;
4950 
4951     ierr = PetscFree(bufj);CHKERRQ(ierr);
4952     if (!startsj_s || !bufa_ptr) {
4953       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4954       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4955     } else {
4956       *startsj_s = sstartsj;
4957       *startsj_r = rstartsj;
4958       *bufa_ptr  = bufa;
4959     }
4960   }
4961   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4962   PetscFunctionReturn(0);
4963 }
4964 
4965 #undef __FUNCT__
4966 #define __FUNCT__ "MatGetCommunicationStructs"
4967 /*@C
4968   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4969 
4970   Not Collective
4971 
4972   Input Parameters:
4973 . A - The matrix in mpiaij format
4974 
4975   Output Parameter:
4976 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4977 . colmap - A map from global column index to local index into lvec
4978 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4979 
4980   Level: developer
4981 
4982 @*/
4983 #if defined(PETSC_USE_CTABLE)
4984 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4985 #else
4986 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4987 #endif
4988 {
4989   Mat_MPIAIJ *a;
4990 
4991   PetscFunctionBegin;
4992   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4993   PetscValidPointer(lvec, 2);
4994   PetscValidPointer(colmap, 3);
4995   PetscValidPointer(multScatter, 4);
4996   a = (Mat_MPIAIJ*) A->data;
4997   if (lvec) *lvec = a->lvec;
4998   if (colmap) *colmap = a->colmap;
4999   if (multScatter) *multScatter = a->Mvctx;
5000   PetscFunctionReturn(0);
5001 }
5002 
5003 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5004 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5005 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5006 #if defined(PETSC_HAVE_ELEMENTAL)
5007 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5008 #endif
5009 
5010 #undef __FUNCT__
5011 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5012 /*
5013     Computes (B'*A')' since computing B*A directly is untenable
5014 
5015                n                       p                          p
5016         (              )       (              )         (                  )
5017       m (      A       )  *  n (       B      )   =   m (         C        )
5018         (              )       (              )         (                  )
5019 
5020 */
5021 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5022 {
5023   PetscErrorCode ierr;
5024   Mat            At,Bt,Ct;
5025 
5026   PetscFunctionBegin;
5027   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5028   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5029   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5030   ierr = MatDestroy(&At);CHKERRQ(ierr);
5031   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5032   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5033   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5034   PetscFunctionReturn(0);
5035 }
5036 
5037 #undef __FUNCT__
5038 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5039 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5040 {
5041   PetscErrorCode ierr;
5042   PetscInt       m=A->rmap->n,n=B->cmap->n;
5043   Mat            Cmat;
5044 
5045   PetscFunctionBegin;
5046   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5047   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5048   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5049   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5050   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5051   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5052   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5053   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5054 
5055   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5056 
5057   *C = Cmat;
5058   PetscFunctionReturn(0);
5059 }
5060 
5061 /* ----------------------------------------------------------------*/
5062 #undef __FUNCT__
5063 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5064 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5065 {
5066   PetscErrorCode ierr;
5067 
5068   PetscFunctionBegin;
5069   if (scall == MAT_INITIAL_MATRIX) {
5070     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5071     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5072     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5073   }
5074   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5075   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5076   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5077   PetscFunctionReturn(0);
5078 }
5079 
5080 #if defined(PETSC_HAVE_MUMPS)
5081 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5082 #endif
5083 #if defined(PETSC_HAVE_PASTIX)
5084 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5085 #endif
5086 #if defined(PETSC_HAVE_SUPERLU_DIST)
5087 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5088 #endif
5089 #if defined(PETSC_HAVE_CLIQUE)
5090 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5091 #endif
5092 
5093 /*MC
5094    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5095 
5096    Options Database Keys:
5097 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5098 
5099   Level: beginner
5100 
5101 .seealso: MatCreateAIJ()
5102 M*/
5103 
5104 #undef __FUNCT__
5105 #define __FUNCT__ "MatCreate_MPIAIJ"
5106 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5107 {
5108   Mat_MPIAIJ     *b;
5109   PetscErrorCode ierr;
5110   PetscMPIInt    size;
5111 
5112   PetscFunctionBegin;
5113   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5114 
5115   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5116   B->data       = (void*)b;
5117   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5118   B->assembled  = PETSC_FALSE;
5119   B->insertmode = NOT_SET_VALUES;
5120   b->size       = size;
5121 
5122   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5123 
5124   /* build cache for off array entries formed */
5125   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5126 
5127   b->donotstash  = PETSC_FALSE;
5128   b->colmap      = 0;
5129   b->garray      = 0;
5130   b->roworiented = PETSC_TRUE;
5131 
5132   /* stuff used for matrix vector multiply */
5133   b->lvec  = NULL;
5134   b->Mvctx = NULL;
5135 
5136   /* stuff for MatGetRow() */
5137   b->rowindices   = 0;
5138   b->rowvalues    = 0;
5139   b->getrowactive = PETSC_FALSE;
5140 
5141   /* flexible pointer used in CUSP/CUSPARSE classes */
5142   b->spptr = NULL;
5143 
5144 #if defined(PETSC_HAVE_MUMPS)
5145   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5146 #endif
5147 #if defined(PETSC_HAVE_PASTIX)
5148   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5149 #endif
5150 #if defined(PETSC_HAVE_SUPERLU_DIST)
5151   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5152 #endif
5153 #if defined(PETSC_HAVE_CLIQUE)
5154   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5155 #endif
5156   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5157   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5158   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5159   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5160   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5161   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5162   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5163   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5164   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5165   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5166 #if defined(PETSC_HAVE_ELEMENTAL)
5167   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5168 #endif
5169   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5170   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5171   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5172   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5173   PetscFunctionReturn(0);
5174 }
5175 
5176 #undef __FUNCT__
5177 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5178 /*@
5179      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5180          and "off-diagonal" part of the matrix in CSR format.
5181 
5182    Collective on MPI_Comm
5183 
5184    Input Parameters:
5185 +  comm - MPI communicator
5186 .  m - number of local rows (Cannot be PETSC_DECIDE)
5187 .  n - This value should be the same as the local size used in creating the
5188        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5189        calculated if N is given) For square matrices n is almost always m.
5190 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5191 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5192 .   i - row indices for "diagonal" portion of matrix
5193 .   j - column indices
5194 .   a - matrix values
5195 .   oi - row indices for "off-diagonal" portion of matrix
5196 .   oj - column indices
5197 -   oa - matrix values
5198 
5199    Output Parameter:
5200 .   mat - the matrix
5201 
5202    Level: advanced
5203 
5204    Notes:
5205        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5206        must free the arrays once the matrix has been destroyed and not before.
5207 
5208        The i and j indices are 0 based
5209 
5210        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5211 
5212        This sets local rows and cannot be used to set off-processor values.
5213 
5214        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5215        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5216        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5217        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5218        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5219        communication if it is known that only local entries will be set.
5220 
5221 .keywords: matrix, aij, compressed row, sparse, parallel
5222 
5223 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5224           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5225 @*/
5226 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5227 {
5228   PetscErrorCode ierr;
5229   Mat_MPIAIJ     *maij;
5230 
5231   PetscFunctionBegin;
5232   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5233   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5234   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5235   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5236   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5237   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5238   maij = (Mat_MPIAIJ*) (*mat)->data;
5239 
5240   (*mat)->preallocated = PETSC_TRUE;
5241 
5242   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5243   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5244 
5245   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5246   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5247 
5248   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5249   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5250   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5251   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5252 
5253   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5254   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5255   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5256   PetscFunctionReturn(0);
5257 }
5258 
5259 /*
5260     Special version for direct calls from Fortran
5261 */
5262 #include <petsc-private/fortranimpl.h>
5263 
5264 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5265 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5266 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5267 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5268 #endif
5269 
5270 /* Change these macros so can be used in void function */
5271 #undef CHKERRQ
5272 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5273 #undef SETERRQ2
5274 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5275 #undef SETERRQ3
5276 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5277 #undef SETERRQ
5278 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5279 
5280 #undef __FUNCT__
5281 #define __FUNCT__ "matsetvaluesmpiaij_"
5282 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5283 {
5284   Mat            mat  = *mmat;
5285   PetscInt       m    = *mm, n = *mn;
5286   InsertMode     addv = *maddv;
5287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5288   PetscScalar    value;
5289   PetscErrorCode ierr;
5290 
5291   MatCheckPreallocated(mat,1);
5292   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5293 
5294 #if defined(PETSC_USE_DEBUG)
5295   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5296 #endif
5297   {
5298     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5299     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5300     PetscBool roworiented = aij->roworiented;
5301 
5302     /* Some Variables required in the macro */
5303     Mat        A                 = aij->A;
5304     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5305     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5306     MatScalar  *aa               = a->a;
5307     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5308     Mat        B                 = aij->B;
5309     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5310     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5311     MatScalar  *ba               = b->a;
5312 
5313     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5314     PetscInt  nonew = a->nonew;
5315     MatScalar *ap1,*ap2;
5316 
5317     PetscFunctionBegin;
5318     for (i=0; i<m; i++) {
5319       if (im[i] < 0) continue;
5320 #if defined(PETSC_USE_DEBUG)
5321       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5322 #endif
5323       if (im[i] >= rstart && im[i] < rend) {
5324         row      = im[i] - rstart;
5325         lastcol1 = -1;
5326         rp1      = aj + ai[row];
5327         ap1      = aa + ai[row];
5328         rmax1    = aimax[row];
5329         nrow1    = ailen[row];
5330         low1     = 0;
5331         high1    = nrow1;
5332         lastcol2 = -1;
5333         rp2      = bj + bi[row];
5334         ap2      = ba + bi[row];
5335         rmax2    = bimax[row];
5336         nrow2    = bilen[row];
5337         low2     = 0;
5338         high2    = nrow2;
5339 
5340         for (j=0; j<n; j++) {
5341           if (roworiented) value = v[i*n+j];
5342           else value = v[i+j*m];
5343           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5344           if (in[j] >= cstart && in[j] < cend) {
5345             col = in[j] - cstart;
5346             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5347           } else if (in[j] < 0) continue;
5348 #if defined(PETSC_USE_DEBUG)
5349           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5350 #endif
5351           else {
5352             if (mat->was_assembled) {
5353               if (!aij->colmap) {
5354                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5355               }
5356 #if defined(PETSC_USE_CTABLE)
5357               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5358               col--;
5359 #else
5360               col = aij->colmap[in[j]] - 1;
5361 #endif
5362               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5363                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5364                 col  =  in[j];
5365                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5366                 B     = aij->B;
5367                 b     = (Mat_SeqAIJ*)B->data;
5368                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5369                 rp2   = bj + bi[row];
5370                 ap2   = ba + bi[row];
5371                 rmax2 = bimax[row];
5372                 nrow2 = bilen[row];
5373                 low2  = 0;
5374                 high2 = nrow2;
5375                 bm    = aij->B->rmap->n;
5376                 ba    = b->a;
5377               }
5378             } else col = in[j];
5379             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5380           }
5381         }
5382       } else if (!aij->donotstash) {
5383         if (roworiented) {
5384           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5385         } else {
5386           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5387         }
5388       }
5389     }
5390   }
5391   PetscFunctionReturnVoid();
5392 }
5393 
5394