xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0b291e46b1f87e8204dc3609b5e5317b87a2142f)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatDestroy_MatRedundant"
2512 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2513 {
2514   PetscErrorCode ierr;
2515   Mat_Redundant  *redund;
2516   PetscInt       i;
2517   PetscMPIInt    size;
2518 
2519   PetscFunctionBegin;
2520   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2521   if (size == 1) {
2522     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2523     redund = a->redundant;
2524   } else {
2525     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2526     redund = a->redundant;
2527   }
2528   if (redund){
2529     ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2530     ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2531     ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2532     for (i=0; i<redund->nrecvs; i++) {
2533       ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2534       ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2535     }
2536     ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2537 
2538     if (redund->psubcomm) {
2539       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2540     }
2541 
2542     ierr = redund->Destroy(A);CHKERRQ(ierr);
2543     ierr = PetscFree(redund);CHKERRQ(ierr);
2544   }
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 #undef __FUNCT__
2549 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2550 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2551 {
2552   PetscMPIInt    rank,size;
2553   MPI_Comm       comm,subcomm=psubcomm->comm;
2554   PetscErrorCode ierr;
2555   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2556   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2557   PetscInt       *rowrange = mat->rmap->range;
2558   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2559   Mat            A = aij->A,B=aij->B,C=*matredundant;
2560   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2561   PetscScalar    *sbuf_a;
2562   PetscInt       nzlocal=a->nz+b->nz;
2563   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2564   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2565   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2566   MatScalar      *aworkA,*aworkB;
2567   PetscScalar    *vals;
2568   PetscMPIInt    tag1,tag2,tag3,imdex;
2569   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2570   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2571   MPI_Status     recv_status,*send_status;
2572   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2573   PetscInt       **rbuf_j=NULL;
2574   PetscScalar    **rbuf_a=NULL;
2575   Mat_Redundant  *redund =NULL;
2576   PetscInt       rstart_sub,rend_sub,mloc_sub;
2577 
2578   PetscFunctionBegin;
2579   if (psubcomm->type != PETSC_SUBCOMM_INTERLACED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Input psubcomm must be PETSC_SUBCOMM_INTERLACED type");
2580   ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr);
2581   if (M != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-square matrix yet, rows %D columns %D",M,N);
2582 
2583   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2584   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2585   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2586   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2587   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2588 
2589   if (reuse == MAT_REUSE_MATRIX) {
2590     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2591     if (subsize == 1) {
2592       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2593       redund = c->redundant;
2594     } else {
2595       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2596       redund = c->redundant;
2597     }
2598     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2599 
2600     nsends    = redund->nsends;
2601     nrecvs    = redund->nrecvs;
2602     send_rank = redund->send_rank;
2603     recv_rank = redund->recv_rank;
2604     sbuf_nz   = redund->sbuf_nz;
2605     rbuf_nz   = redund->rbuf_nz;
2606     sbuf_j    = redund->sbuf_j;
2607     sbuf_a    = redund->sbuf_a;
2608     rbuf_j    = redund->rbuf_j;
2609     rbuf_a    = redund->rbuf_a;
2610   }
2611 
2612   if (reuse == MAT_INITIAL_MATRIX) {
2613     PetscInt    nleftover,np_subcomm;
2614 
2615     /* get local size of redundant matrix */
2616     const PetscInt *range;
2617     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2618     rstart_sub = range[nsubcomm*subrank];
2619     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2620       rend_sub = range[nsubcomm*(subrank+1)];
2621     } else {
2622       rend_sub = mat->rmap->N;
2623     }
2624     mloc_sub = rend_sub - rstart_sub;
2625 
2626     /* get the destination processors' id send_rank, nsends and nrecvs */
2627     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2628 
2629     np_subcomm = size/nsubcomm;
2630     nleftover  = size - nsubcomm*np_subcomm;
2631 
2632     nsends = 0; nrecvs = 0;
2633     for (i=0; i<size; i++) {
2634       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2635         send_rank[nsends]   = i; nsends++;
2636         recv_rank[nrecvs++] = i;
2637       }
2638     }
2639     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2640       i = size-nleftover-1;
2641       j = 0;
2642       while (j < nsubcomm - nleftover) {
2643         send_rank[nsends++] = i;
2644         i--; j++;
2645       }
2646     }
2647 
2648     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2649       for (i=0; i<nleftover; i++) {
2650         recv_rank[nrecvs++] = size-nleftover+i;
2651       }
2652     }
2653 
2654     /* allocate sbuf_j, sbuf_a */
2655     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2656     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2657     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2658   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2659 
2660   /* copy mat's local entries into the buffers */
2661   if (reuse == MAT_INITIAL_MATRIX) {
2662     rownz_max = 0;
2663     rptr      = sbuf_j;
2664     cols      = sbuf_j + rend-rstart + 1;
2665     vals      = sbuf_a;
2666     rptr[0]   = 0;
2667     for (i=0; i<rend-rstart; i++) {
2668       row    = i + rstart;
2669       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2670       ncols  = nzA + nzB;
2671       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2672       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2673       /* load the column indices for this row into cols */
2674       lwrite = 0;
2675       for (l=0; l<nzB; l++) {
2676         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2677           vals[lwrite]   = aworkB[l];
2678           cols[lwrite++] = ctmp;
2679         }
2680       }
2681       for (l=0; l<nzA; l++) {
2682         vals[lwrite]   = aworkA[l];
2683         cols[lwrite++] = cstart + cworkA[l];
2684       }
2685       for (l=0; l<nzB; l++) {
2686         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2687           vals[lwrite]   = aworkB[l];
2688           cols[lwrite++] = ctmp;
2689         }
2690       }
2691       vals     += ncols;
2692       cols     += ncols;
2693       rptr[i+1] = rptr[i] + ncols;
2694       if (rownz_max < ncols) rownz_max = ncols;
2695     }
2696     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2697   } else { /* only copy matrix values into sbuf_a */
2698     rptr    = sbuf_j;
2699     vals    = sbuf_a;
2700     rptr[0] = 0;
2701     for (i=0; i<rend-rstart; i++) {
2702       row    = i + rstart;
2703       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2704       ncols  = nzA + nzB;
2705       cworkB = b->j + b->i[i];
2706       aworkA = a->a + a->i[i];
2707       aworkB = b->a + b->i[i];
2708       lwrite = 0;
2709       for (l=0; l<nzB; l++) {
2710         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2711       }
2712       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2713       for (l=0; l<nzB; l++) {
2714         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2715       }
2716       vals     += ncols;
2717       rptr[i+1] = rptr[i] + ncols;
2718     }
2719   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2720 
2721   /* send nzlocal to others, and recv other's nzlocal */
2722   /*--------------------------------------------------*/
2723   if (reuse == MAT_INITIAL_MATRIX) {
2724     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2725 
2726     s_waits2 = s_waits3 + nsends;
2727     s_waits1 = s_waits2 + nsends;
2728     r_waits1 = s_waits1 + nsends;
2729     r_waits2 = r_waits1 + nrecvs;
2730     r_waits3 = r_waits2 + nrecvs;
2731   } else {
2732     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2733 
2734     r_waits3 = s_waits3 + nsends;
2735   }
2736 
2737   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2738   if (reuse == MAT_INITIAL_MATRIX) {
2739     /* get new tags to keep the communication clean */
2740     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2741     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2742     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2743 
2744     /* post receives of other's nzlocal */
2745     for (i=0; i<nrecvs; i++) {
2746       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2747     }
2748     /* send nzlocal to others */
2749     for (i=0; i<nsends; i++) {
2750       sbuf_nz[i] = nzlocal;
2751       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2752     }
2753     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2754     count = nrecvs;
2755     while (count) {
2756       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2757 
2758       recv_rank[imdex] = recv_status.MPI_SOURCE;
2759       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2760       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2761 
2762       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2763 
2764       rbuf_nz[imdex] += i + 2;
2765 
2766       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2767       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2768       count--;
2769     }
2770     /* wait on sends of nzlocal */
2771     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2772     /* send mat->i,j to others, and recv from other's */
2773     /*------------------------------------------------*/
2774     for (i=0; i<nsends; i++) {
2775       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2776       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2777     }
2778     /* wait on receives of mat->i,j */
2779     /*------------------------------*/
2780     count = nrecvs;
2781     while (count) {
2782       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2783       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2784       count--;
2785     }
2786     /* wait on sends of mat->i,j */
2787     /*---------------------------*/
2788     if (nsends) {
2789       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2790     }
2791   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2792 
2793   /* post receives, send and receive mat->a */
2794   /*----------------------------------------*/
2795   for (imdex=0; imdex<nrecvs; imdex++) {
2796     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2797   }
2798   for (i=0; i<nsends; i++) {
2799     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2800   }
2801   count = nrecvs;
2802   while (count) {
2803     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2804     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2805     count--;
2806   }
2807   if (nsends) {
2808     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2809   }
2810 
2811   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2812 
2813   /* create redundant matrix */
2814   /*-------------------------*/
2815   if (reuse == MAT_INITIAL_MATRIX) {
2816     /* compute rownz_max for preallocation */
2817     for (imdex=0; imdex<nrecvs; imdex++) {
2818       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2819       rptr = rbuf_j[imdex];
2820       for (i=0; i<j; i++) {
2821         ncols = rptr[i+1] - rptr[i];
2822         if (rownz_max < ncols) rownz_max = ncols;
2823       }
2824     }
2825 
2826     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2827     ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2828     /* ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DETERMINE,mat->cmap->N);CHKERRQ(ierr); -- give incorrect local column! */
2829     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2830     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2831     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2832     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2833   } else {
2834     C = *matredundant;
2835   }
2836 
2837   /* insert local matrix entries */
2838   rptr = sbuf_j;
2839   cols = sbuf_j + rend-rstart + 1;
2840   vals = sbuf_a;
2841   for (i=0; i<rend-rstart; i++) {
2842     row   = i + rstart;
2843     ncols = rptr[i+1] - rptr[i];
2844     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2845     vals += ncols;
2846     cols += ncols;
2847   }
2848   /* insert received matrix entries */
2849   for (imdex=0; imdex<nrecvs; imdex++) {
2850     rstart = rowrange[recv_rank[imdex]];
2851     rend   = rowrange[recv_rank[imdex]+1];
2852     rptr   = rbuf_j[imdex];
2853     cols   = rbuf_j[imdex] + rend-rstart + 1;
2854     vals   = rbuf_a[imdex];
2855     for (i=0; i<rend-rstart; i++) {
2856       row   = i + rstart;
2857       ncols = rptr[i+1] - rptr[i];
2858       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2859       vals += ncols;
2860       cols += ncols;
2861     }
2862   }
2863   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2864   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2865 
2866   if (reuse == MAT_INITIAL_MATRIX) {
2867     *matredundant = C;
2868 
2869     /* create a supporting struct and attach it to C for reuse */
2870     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2871     if (subsize == 1) {
2872       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2873       c->redundant = redund;
2874     } else {
2875       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2876       c->redundant = redund;
2877     }
2878 
2879     redund->nzlocal   = nzlocal;
2880     redund->nsends    = nsends;
2881     redund->nrecvs    = nrecvs;
2882     redund->send_rank = send_rank;
2883     redund->recv_rank = recv_rank;
2884     redund->sbuf_nz   = sbuf_nz;
2885     redund->rbuf_nz   = rbuf_nz;
2886     redund->sbuf_j    = sbuf_j;
2887     redund->sbuf_a    = sbuf_a;
2888     redund->rbuf_j    = rbuf_j;
2889     redund->rbuf_a    = rbuf_a;
2890     redund->psubcomm  = NULL;
2891 
2892     redund->Destroy = C->ops->destroy;
2893     C->ops->destroy = MatDestroy_MatRedundant;
2894   }
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 #undef __FUNCT__
2899 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2900 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2901 {
2902   PetscErrorCode ierr;
2903 
2904   PetscFunctionBegin;
2905   /* Only MatGetRedundantMatrix_MPIAIJ_interlaced() is written now */
2906   if (!subcomm || subcomm == PETSC_COMM_SELF) { /* create psubcomm */
2907     MPI_Comm       comm;
2908     PetscSubcomm   psubcomm;
2909     PetscMPIInt    size,subsize;
2910     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2911     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2912     ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2913     ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2914     ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr);
2915     ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr);
2916 
2917     /* free psubcomm in MatDestroy_MatRedundant() */
2918     ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2919     Mat C = *matredundant;
2920     if (subsize == 1) {
2921       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2922       c->redundant->psubcomm = psubcomm;
2923     } else {
2924       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2925       c->redundant->psubcomm = psubcomm ;
2926     }
2927   } else {
2928     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet");
2929   }
2930   PetscFunctionReturn(0);
2931 }
2932 
2933 #undef __FUNCT__
2934 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2935 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2936 {
2937   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2938   PetscErrorCode ierr;
2939   PetscInt       i,*idxb = 0;
2940   PetscScalar    *va,*vb;
2941   Vec            vtmp;
2942 
2943   PetscFunctionBegin;
2944   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2945   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2946   if (idx) {
2947     for (i=0; i<A->rmap->n; i++) {
2948       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2949     }
2950   }
2951 
2952   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2953   if (idx) {
2954     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2955   }
2956   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2957   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2958 
2959   for (i=0; i<A->rmap->n; i++) {
2960     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2961       va[i] = vb[i];
2962       if (idx) idx[i] = a->garray[idxb[i]];
2963     }
2964   }
2965 
2966   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2967   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2968   ierr = PetscFree(idxb);CHKERRQ(ierr);
2969   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2975 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2976 {
2977   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2978   PetscErrorCode ierr;
2979   PetscInt       i,*idxb = 0;
2980   PetscScalar    *va,*vb;
2981   Vec            vtmp;
2982 
2983   PetscFunctionBegin;
2984   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2985   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2986   if (idx) {
2987     for (i=0; i<A->cmap->n; i++) {
2988       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2989     }
2990   }
2991 
2992   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2993   if (idx) {
2994     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2995   }
2996   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2997   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2998 
2999   for (i=0; i<A->rmap->n; i++) {
3000     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3001       va[i] = vb[i];
3002       if (idx) idx[i] = a->garray[idxb[i]];
3003     }
3004   }
3005 
3006   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3007   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3008   ierr = PetscFree(idxb);CHKERRQ(ierr);
3009   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 #undef __FUNCT__
3014 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3015 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3016 {
3017   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3018   PetscInt       n      = A->rmap->n;
3019   PetscInt       cstart = A->cmap->rstart;
3020   PetscInt       *cmap  = mat->garray;
3021   PetscInt       *diagIdx, *offdiagIdx;
3022   Vec            diagV, offdiagV;
3023   PetscScalar    *a, *diagA, *offdiagA;
3024   PetscInt       r;
3025   PetscErrorCode ierr;
3026 
3027   PetscFunctionBegin;
3028   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3029   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3030   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3031   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3032   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3033   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3034   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3035   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3036   for (r = 0; r < n; ++r) {
3037     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3038       a[r]   = diagA[r];
3039       idx[r] = cstart + diagIdx[r];
3040     } else {
3041       a[r]   = offdiagA[r];
3042       idx[r] = cmap[offdiagIdx[r]];
3043     }
3044   }
3045   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3046   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3047   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3048   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3049   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3050   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 
3054 #undef __FUNCT__
3055 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3056 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3057 {
3058   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3059   PetscInt       n      = A->rmap->n;
3060   PetscInt       cstart = A->cmap->rstart;
3061   PetscInt       *cmap  = mat->garray;
3062   PetscInt       *diagIdx, *offdiagIdx;
3063   Vec            diagV, offdiagV;
3064   PetscScalar    *a, *diagA, *offdiagA;
3065   PetscInt       r;
3066   PetscErrorCode ierr;
3067 
3068   PetscFunctionBegin;
3069   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3070   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3071   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3072   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3073   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3074   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3075   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3076   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3077   for (r = 0; r < n; ++r) {
3078     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3079       a[r]   = diagA[r];
3080       idx[r] = cstart + diagIdx[r];
3081     } else {
3082       a[r]   = offdiagA[r];
3083       idx[r] = cmap[offdiagIdx[r]];
3084     }
3085   }
3086   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3087   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3088   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3089   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3090   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3091   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3097 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3098 {
3099   PetscErrorCode ierr;
3100   Mat            *dummy;
3101 
3102   PetscFunctionBegin;
3103   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3104   *newmat = *dummy;
3105   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3106   PetscFunctionReturn(0);
3107 }
3108 
3109 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3110 
3111 #undef __FUNCT__
3112 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3113 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3114 {
3115   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3116   PetscErrorCode ierr;
3117 
3118   PetscFunctionBegin;
3119   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3120   PetscFunctionReturn(0);
3121 }
3122 
3123 #undef __FUNCT__
3124 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3125 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3126 {
3127   PetscErrorCode ierr;
3128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3129 
3130   PetscFunctionBegin;
3131   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3132   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3133   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3134   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3135   PetscFunctionReturn(0);
3136 }
3137 
3138 /* -------------------------------------------------------------------*/
3139 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3140                                        MatGetRow_MPIAIJ,
3141                                        MatRestoreRow_MPIAIJ,
3142                                        MatMult_MPIAIJ,
3143                                 /* 4*/ MatMultAdd_MPIAIJ,
3144                                        MatMultTranspose_MPIAIJ,
3145                                        MatMultTransposeAdd_MPIAIJ,
3146 #if defined(PETSC_HAVE_PBGL)
3147                                        MatSolve_MPIAIJ,
3148 #else
3149                                        0,
3150 #endif
3151                                        0,
3152                                        0,
3153                                 /*10*/ 0,
3154                                        0,
3155                                        0,
3156                                        MatSOR_MPIAIJ,
3157                                        MatTranspose_MPIAIJ,
3158                                 /*15*/ MatGetInfo_MPIAIJ,
3159                                        MatEqual_MPIAIJ,
3160                                        MatGetDiagonal_MPIAIJ,
3161                                        MatDiagonalScale_MPIAIJ,
3162                                        MatNorm_MPIAIJ,
3163                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3164                                        MatAssemblyEnd_MPIAIJ,
3165                                        MatSetOption_MPIAIJ,
3166                                        MatZeroEntries_MPIAIJ,
3167                                 /*24*/ MatZeroRows_MPIAIJ,
3168                                        0,
3169 #if defined(PETSC_HAVE_PBGL)
3170                                        0,
3171 #else
3172                                        0,
3173 #endif
3174                                        0,
3175                                        0,
3176                                 /*29*/ MatSetUp_MPIAIJ,
3177 #if defined(PETSC_HAVE_PBGL)
3178                                        0,
3179 #else
3180                                        0,
3181 #endif
3182                                        0,
3183                                        0,
3184                                        0,
3185                                 /*34*/ MatDuplicate_MPIAIJ,
3186                                        0,
3187                                        0,
3188                                        0,
3189                                        0,
3190                                 /*39*/ MatAXPY_MPIAIJ,
3191                                        MatGetSubMatrices_MPIAIJ,
3192                                        MatIncreaseOverlap_MPIAIJ,
3193                                        MatGetValues_MPIAIJ,
3194                                        MatCopy_MPIAIJ,
3195                                 /*44*/ MatGetRowMax_MPIAIJ,
3196                                        MatScale_MPIAIJ,
3197                                        0,
3198                                        0,
3199                                        MatZeroRowsColumns_MPIAIJ,
3200                                 /*49*/ MatSetRandom_MPIAIJ,
3201                                        0,
3202                                        0,
3203                                        0,
3204                                        0,
3205                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3206                                        0,
3207                                        MatSetUnfactored_MPIAIJ,
3208                                        MatPermute_MPIAIJ,
3209                                        0,
3210                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3211                                        MatDestroy_MPIAIJ,
3212                                        MatView_MPIAIJ,
3213                                        0,
3214                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3215                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3216                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3217                                        0,
3218                                        0,
3219                                        0,
3220                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3221                                        MatGetRowMinAbs_MPIAIJ,
3222                                        0,
3223                                        MatSetColoring_MPIAIJ,
3224                                        0,
3225                                        MatSetValuesAdifor_MPIAIJ,
3226                                 /*75*/ MatFDColoringApply_AIJ,
3227                                        0,
3228                                        0,
3229                                        0,
3230                                        MatFindZeroDiagonals_MPIAIJ,
3231                                 /*80*/ 0,
3232                                        0,
3233                                        0,
3234                                 /*83*/ MatLoad_MPIAIJ,
3235                                        0,
3236                                        0,
3237                                        0,
3238                                        0,
3239                                        0,
3240                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3241                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3242                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3243                                        MatPtAP_MPIAIJ_MPIAIJ,
3244                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3245                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3246                                        0,
3247                                        0,
3248                                        0,
3249                                        0,
3250                                 /*99*/ 0,
3251                                        0,
3252                                        0,
3253                                        MatConjugate_MPIAIJ,
3254                                        0,
3255                                 /*104*/MatSetValuesRow_MPIAIJ,
3256                                        MatRealPart_MPIAIJ,
3257                                        MatImaginaryPart_MPIAIJ,
3258                                        0,
3259                                        0,
3260                                 /*109*/0,
3261                                        MatGetRedundantMatrix_MPIAIJ,
3262                                        MatGetRowMin_MPIAIJ,
3263                                        0,
3264                                        0,
3265                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3266                                        0,
3267                                        0,
3268                                        0,
3269                                        0,
3270                                 /*119*/0,
3271                                        0,
3272                                        0,
3273                                        0,
3274                                        MatGetMultiProcBlock_MPIAIJ,
3275                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3276                                        MatGetColumnNorms_MPIAIJ,
3277                                        MatInvertBlockDiagonal_MPIAIJ,
3278                                        0,
3279                                        MatGetSubMatricesParallel_MPIAIJ,
3280                                 /*129*/0,
3281                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3282                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3283                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3284                                        0,
3285                                 /*134*/0,
3286                                        0,
3287                                        0,
3288                                        0,
3289                                        0,
3290                                 /*139*/0,
3291                                        0
3292 };
3293 
3294 /* ----------------------------------------------------------------------------------------*/
3295 
3296 #undef __FUNCT__
3297 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3298 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3299 {
3300   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3301   PetscErrorCode ierr;
3302 
3303   PetscFunctionBegin;
3304   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3305   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3306   PetscFunctionReturn(0);
3307 }
3308 
3309 #undef __FUNCT__
3310 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3311 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3312 {
3313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3314   PetscErrorCode ierr;
3315 
3316   PetscFunctionBegin;
3317   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3318   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3319   PetscFunctionReturn(0);
3320 }
3321 
3322 #undef __FUNCT__
3323 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3324 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3325 {
3326   Mat_MPIAIJ     *b;
3327   PetscErrorCode ierr;
3328 
3329   PetscFunctionBegin;
3330   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3331   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3332   b = (Mat_MPIAIJ*)B->data;
3333 
3334   if (!B->preallocated) {
3335     /* Explicitly create 2 MATSEQAIJ matrices. */
3336     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3337     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3338     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3339     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3340     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3341     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3342     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3343     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3344     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3345     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3346   }
3347 
3348   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3349   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3350   B->preallocated = PETSC_TRUE;
3351   PetscFunctionReturn(0);
3352 }
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3356 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3357 {
3358   Mat            mat;
3359   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3360   PetscErrorCode ierr;
3361 
3362   PetscFunctionBegin;
3363   *newmat = 0;
3364   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3365   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3366   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3367   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3368   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3369   a       = (Mat_MPIAIJ*)mat->data;
3370 
3371   mat->factortype   = matin->factortype;
3372   mat->rmap->bs     = matin->rmap->bs;
3373   mat->cmap->bs     = matin->cmap->bs;
3374   mat->assembled    = PETSC_TRUE;
3375   mat->insertmode   = NOT_SET_VALUES;
3376   mat->preallocated = PETSC_TRUE;
3377 
3378   a->size         = oldmat->size;
3379   a->rank         = oldmat->rank;
3380   a->donotstash   = oldmat->donotstash;
3381   a->roworiented  = oldmat->roworiented;
3382   a->rowindices   = 0;
3383   a->rowvalues    = 0;
3384   a->getrowactive = PETSC_FALSE;
3385 
3386   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3387   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3388 
3389   if (oldmat->colmap) {
3390 #if defined(PETSC_USE_CTABLE)
3391     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3392 #else
3393     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3394     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3395     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3396 #endif
3397   } else a->colmap = 0;
3398   if (oldmat->garray) {
3399     PetscInt len;
3400     len  = oldmat->B->cmap->n;
3401     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3402     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3403     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3404   } else a->garray = 0;
3405 
3406   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3407   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3408   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3409   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3410   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3411   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3412   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3413   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3414   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3415   *newmat = mat;
3416   PetscFunctionReturn(0);
3417 }
3418 
3419 
3420 
3421 #undef __FUNCT__
3422 #define __FUNCT__ "MatLoad_MPIAIJ"
3423 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3424 {
3425   PetscScalar    *vals,*svals;
3426   MPI_Comm       comm;
3427   PetscErrorCode ierr;
3428   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3429   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3430   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3431   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3432   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3433   int            fd;
3434   PetscInt       bs = 1;
3435 
3436   PetscFunctionBegin;
3437   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3438   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3439   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3440   if (!rank) {
3441     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3442     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3443     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3444   }
3445 
3446   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3447   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3448   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3449 
3450   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3451 
3452   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3453   M    = header[1]; N = header[2];
3454   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3455   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3456   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3457 
3458   /* If global sizes are set, check if they are consistent with that given in the file */
3459   if (sizesset) {
3460     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3461   }
3462   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3463   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3464 
3465   /* determine ownership of all (block) rows */
3466   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3467   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3468   else m = newMat->rmap->n; /* Set by user */
3469 
3470   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3471   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3472 
3473   /* First process needs enough room for process with most rows */
3474   if (!rank) {
3475     mmax = rowners[1];
3476     for (i=2; i<=size; i++) {
3477       mmax = PetscMax(mmax, rowners[i]);
3478     }
3479   } else mmax = -1;             /* unused, but compilers complain */
3480 
3481   rowners[0] = 0;
3482   for (i=2; i<=size; i++) {
3483     rowners[i] += rowners[i-1];
3484   }
3485   rstart = rowners[rank];
3486   rend   = rowners[rank+1];
3487 
3488   /* distribute row lengths to all processors */
3489   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3490   if (!rank) {
3491     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3492     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3493     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3494     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3495     for (j=0; j<m; j++) {
3496       procsnz[0] += ourlens[j];
3497     }
3498     for (i=1; i<size; i++) {
3499       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3500       /* calculate the number of nonzeros on each processor */
3501       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3502         procsnz[i] += rowlengths[j];
3503       }
3504       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3505     }
3506     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3507   } else {
3508     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3509   }
3510 
3511   if (!rank) {
3512     /* determine max buffer needed and allocate it */
3513     maxnz = 0;
3514     for (i=0; i<size; i++) {
3515       maxnz = PetscMax(maxnz,procsnz[i]);
3516     }
3517     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3518 
3519     /* read in my part of the matrix column indices  */
3520     nz   = procsnz[0];
3521     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3522     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3523 
3524     /* read in every one elses and ship off */
3525     for (i=1; i<size; i++) {
3526       nz   = procsnz[i];
3527       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3528       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3529     }
3530     ierr = PetscFree(cols);CHKERRQ(ierr);
3531   } else {
3532     /* determine buffer space needed for message */
3533     nz = 0;
3534     for (i=0; i<m; i++) {
3535       nz += ourlens[i];
3536     }
3537     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3538 
3539     /* receive message of column indices*/
3540     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3541   }
3542 
3543   /* determine column ownership if matrix is not square */
3544   if (N != M) {
3545     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3546     else n = newMat->cmap->n;
3547     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3548     cstart = cend - n;
3549   } else {
3550     cstart = rstart;
3551     cend   = rend;
3552     n      = cend - cstart;
3553   }
3554 
3555   /* loop over local rows, determining number of off diagonal entries */
3556   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3557   jj   = 0;
3558   for (i=0; i<m; i++) {
3559     for (j=0; j<ourlens[i]; j++) {
3560       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3561       jj++;
3562     }
3563   }
3564 
3565   for (i=0; i<m; i++) {
3566     ourlens[i] -= offlens[i];
3567   }
3568   if (!sizesset) {
3569     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3570   }
3571 
3572   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3573 
3574   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3575 
3576   for (i=0; i<m; i++) {
3577     ourlens[i] += offlens[i];
3578   }
3579 
3580   if (!rank) {
3581     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3582 
3583     /* read in my part of the matrix numerical values  */
3584     nz   = procsnz[0];
3585     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3586 
3587     /* insert into matrix */
3588     jj      = rstart;
3589     smycols = mycols;
3590     svals   = vals;
3591     for (i=0; i<m; i++) {
3592       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3593       smycols += ourlens[i];
3594       svals   += ourlens[i];
3595       jj++;
3596     }
3597 
3598     /* read in other processors and ship out */
3599     for (i=1; i<size; i++) {
3600       nz   = procsnz[i];
3601       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3602       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3603     }
3604     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3605   } else {
3606     /* receive numeric values */
3607     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3608 
3609     /* receive message of values*/
3610     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3611 
3612     /* insert into matrix */
3613     jj      = rstart;
3614     smycols = mycols;
3615     svals   = vals;
3616     for (i=0; i<m; i++) {
3617       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3618       smycols += ourlens[i];
3619       svals   += ourlens[i];
3620       jj++;
3621     }
3622   }
3623   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3624   ierr = PetscFree(vals);CHKERRQ(ierr);
3625   ierr = PetscFree(mycols);CHKERRQ(ierr);
3626   ierr = PetscFree(rowners);CHKERRQ(ierr);
3627   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3628   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3629   PetscFunctionReturn(0);
3630 }
3631 
3632 #undef __FUNCT__
3633 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3634 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3635 {
3636   PetscErrorCode ierr;
3637   IS             iscol_local;
3638   PetscInt       csize;
3639 
3640   PetscFunctionBegin;
3641   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3642   if (call == MAT_REUSE_MATRIX) {
3643     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3644     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3645   } else {
3646     PetscInt cbs;
3647     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3648     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3649     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3650   }
3651   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3652   if (call == MAT_INITIAL_MATRIX) {
3653     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3654     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3655   }
3656   PetscFunctionReturn(0);
3657 }
3658 
3659 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3660 #undef __FUNCT__
3661 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3662 /*
3663     Not great since it makes two copies of the submatrix, first an SeqAIJ
3664   in local and then by concatenating the local matrices the end result.
3665   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3666 
3667   Note: This requires a sequential iscol with all indices.
3668 */
3669 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3670 {
3671   PetscErrorCode ierr;
3672   PetscMPIInt    rank,size;
3673   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3674   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3675   PetscBool      allcolumns, colflag;
3676   Mat            M,Mreuse;
3677   MatScalar      *vwork,*aa;
3678   MPI_Comm       comm;
3679   Mat_SeqAIJ     *aij;
3680 
3681   PetscFunctionBegin;
3682   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3683   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3684   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3685 
3686   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3687   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3688   if (colflag && ncol == mat->cmap->N) {
3689     allcolumns = PETSC_TRUE;
3690   } else {
3691     allcolumns = PETSC_FALSE;
3692   }
3693   if (call ==  MAT_REUSE_MATRIX) {
3694     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3695     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3696     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3697   } else {
3698     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3699   }
3700 
3701   /*
3702       m - number of local rows
3703       n - number of columns (same on all processors)
3704       rstart - first row in new global matrix generated
3705   */
3706   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3707   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3708   if (call == MAT_INITIAL_MATRIX) {
3709     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3710     ii  = aij->i;
3711     jj  = aij->j;
3712 
3713     /*
3714         Determine the number of non-zeros in the diagonal and off-diagonal
3715         portions of the matrix in order to do correct preallocation
3716     */
3717 
3718     /* first get start and end of "diagonal" columns */
3719     if (csize == PETSC_DECIDE) {
3720       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3721       if (mglobal == n) { /* square matrix */
3722         nlocal = m;
3723       } else {
3724         nlocal = n/size + ((n % size) > rank);
3725       }
3726     } else {
3727       nlocal = csize;
3728     }
3729     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3730     rstart = rend - nlocal;
3731     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3732 
3733     /* next, compute all the lengths */
3734     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3735     olens = dlens + m;
3736     for (i=0; i<m; i++) {
3737       jend = ii[i+1] - ii[i];
3738       olen = 0;
3739       dlen = 0;
3740       for (j=0; j<jend; j++) {
3741         if (*jj < rstart || *jj >= rend) olen++;
3742         else dlen++;
3743         jj++;
3744       }
3745       olens[i] = olen;
3746       dlens[i] = dlen;
3747     }
3748     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3749     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3750     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3751     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3752     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3753     ierr = PetscFree(dlens);CHKERRQ(ierr);
3754   } else {
3755     PetscInt ml,nl;
3756 
3757     M    = *newmat;
3758     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3759     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3760     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3761     /*
3762          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3763        rather than the slower MatSetValues().
3764     */
3765     M->was_assembled = PETSC_TRUE;
3766     M->assembled     = PETSC_FALSE;
3767   }
3768   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3769   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3770   ii   = aij->i;
3771   jj   = aij->j;
3772   aa   = aij->a;
3773   for (i=0; i<m; i++) {
3774     row   = rstart + i;
3775     nz    = ii[i+1] - ii[i];
3776     cwork = jj;     jj += nz;
3777     vwork = aa;     aa += nz;
3778     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3779   }
3780 
3781   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3782   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3783   *newmat = M;
3784 
3785   /* save submatrix used in processor for next request */
3786   if (call ==  MAT_INITIAL_MATRIX) {
3787     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3788     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3789   }
3790   PetscFunctionReturn(0);
3791 }
3792 
3793 #undef __FUNCT__
3794 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3795 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3796 {
3797   PetscInt       m,cstart, cend,j,nnz,i,d;
3798   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3799   const PetscInt *JJ;
3800   PetscScalar    *values;
3801   PetscErrorCode ierr;
3802 
3803   PetscFunctionBegin;
3804   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3805 
3806   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3807   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3808   m      = B->rmap->n;
3809   cstart = B->cmap->rstart;
3810   cend   = B->cmap->rend;
3811   rstart = B->rmap->rstart;
3812 
3813   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3814 
3815 #if defined(PETSC_USE_DEBUGGING)
3816   for (i=0; i<m; i++) {
3817     nnz = Ii[i+1]- Ii[i];
3818     JJ  = J + Ii[i];
3819     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3820     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3821     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3822   }
3823 #endif
3824 
3825   for (i=0; i<m; i++) {
3826     nnz     = Ii[i+1]- Ii[i];
3827     JJ      = J + Ii[i];
3828     nnz_max = PetscMax(nnz_max,nnz);
3829     d       = 0;
3830     for (j=0; j<nnz; j++) {
3831       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3832     }
3833     d_nnz[i] = d;
3834     o_nnz[i] = nnz - d;
3835   }
3836   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3837   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3838 
3839   if (v) values = (PetscScalar*)v;
3840   else {
3841     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3842     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3843   }
3844 
3845   for (i=0; i<m; i++) {
3846     ii   = i + rstart;
3847     nnz  = Ii[i+1]- Ii[i];
3848     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3849   }
3850   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3851   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3852 
3853   if (!v) {
3854     ierr = PetscFree(values);CHKERRQ(ierr);
3855   }
3856   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3857   PetscFunctionReturn(0);
3858 }
3859 
3860 #undef __FUNCT__
3861 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3862 /*@
3863    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3864    (the default parallel PETSc format).
3865 
3866    Collective on MPI_Comm
3867 
3868    Input Parameters:
3869 +  B - the matrix
3870 .  i - the indices into j for the start of each local row (starts with zero)
3871 .  j - the column indices for each local row (starts with zero)
3872 -  v - optional values in the matrix
3873 
3874    Level: developer
3875 
3876    Notes:
3877        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3878      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3879      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3880 
3881        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3882 
3883        The format which is used for the sparse matrix input, is equivalent to a
3884     row-major ordering.. i.e for the following matrix, the input data expected is
3885     as shown:
3886 
3887         1 0 0
3888         2 0 3     P0
3889        -------
3890         4 5 6     P1
3891 
3892      Process0 [P0]: rows_owned=[0,1]
3893         i =  {0,1,3}  [size = nrow+1  = 2+1]
3894         j =  {0,0,2}  [size = nz = 6]
3895         v =  {1,2,3}  [size = nz = 6]
3896 
3897      Process1 [P1]: rows_owned=[2]
3898         i =  {0,3}    [size = nrow+1  = 1+1]
3899         j =  {0,1,2}  [size = nz = 6]
3900         v =  {4,5,6}  [size = nz = 6]
3901 
3902 .keywords: matrix, aij, compressed row, sparse, parallel
3903 
3904 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3905           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3906 @*/
3907 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3908 {
3909   PetscErrorCode ierr;
3910 
3911   PetscFunctionBegin;
3912   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3913   PetscFunctionReturn(0);
3914 }
3915 
3916 #undef __FUNCT__
3917 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3918 /*@C
3919    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3920    (the default parallel PETSc format).  For good matrix assembly performance
3921    the user should preallocate the matrix storage by setting the parameters
3922    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3923    performance can be increased by more than a factor of 50.
3924 
3925    Collective on MPI_Comm
3926 
3927    Input Parameters:
3928 +  A - the matrix
3929 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3930            (same value is used for all local rows)
3931 .  d_nnz - array containing the number of nonzeros in the various rows of the
3932            DIAGONAL portion of the local submatrix (possibly different for each row)
3933            or NULL, if d_nz is used to specify the nonzero structure.
3934            The size of this array is equal to the number of local rows, i.e 'm'.
3935            For matrices that will be factored, you must leave room for (and set)
3936            the diagonal entry even if it is zero.
3937 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3938            submatrix (same value is used for all local rows).
3939 -  o_nnz - array containing the number of nonzeros in the various rows of the
3940            OFF-DIAGONAL portion of the local submatrix (possibly different for
3941            each row) or NULL, if o_nz is used to specify the nonzero
3942            structure. The size of this array is equal to the number
3943            of local rows, i.e 'm'.
3944 
3945    If the *_nnz parameter is given then the *_nz parameter is ignored
3946 
3947    The AIJ format (also called the Yale sparse matrix format or
3948    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3949    storage.  The stored row and column indices begin with zero.
3950    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3951 
3952    The parallel matrix is partitioned such that the first m0 rows belong to
3953    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3954    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3955 
3956    The DIAGONAL portion of the local submatrix of a processor can be defined
3957    as the submatrix which is obtained by extraction the part corresponding to
3958    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3959    first row that belongs to the processor, r2 is the last row belonging to
3960    the this processor, and c1-c2 is range of indices of the local part of a
3961    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3962    common case of a square matrix, the row and column ranges are the same and
3963    the DIAGONAL part is also square. The remaining portion of the local
3964    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3965 
3966    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3967 
3968    You can call MatGetInfo() to get information on how effective the preallocation was;
3969    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3970    You can also run with the option -info and look for messages with the string
3971    malloc in them to see if additional memory allocation was needed.
3972 
3973    Example usage:
3974 
3975    Consider the following 8x8 matrix with 34 non-zero values, that is
3976    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3977    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3978    as follows:
3979 
3980 .vb
3981             1  2  0  |  0  3  0  |  0  4
3982     Proc0   0  5  6  |  7  0  0  |  8  0
3983             9  0 10  | 11  0  0  | 12  0
3984     -------------------------------------
3985            13  0 14  | 15 16 17  |  0  0
3986     Proc1   0 18  0  | 19 20 21  |  0  0
3987             0  0  0  | 22 23  0  | 24  0
3988     -------------------------------------
3989     Proc2  25 26 27  |  0  0 28  | 29  0
3990            30  0  0  | 31 32 33  |  0 34
3991 .ve
3992 
3993    This can be represented as a collection of submatrices as:
3994 
3995 .vb
3996       A B C
3997       D E F
3998       G H I
3999 .ve
4000 
4001    Where the submatrices A,B,C are owned by proc0, D,E,F are
4002    owned by proc1, G,H,I are owned by proc2.
4003 
4004    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4005    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4006    The 'M','N' parameters are 8,8, and have the same values on all procs.
4007 
4008    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4009    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4010    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4011    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4012    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4013    matrix, ans [DF] as another SeqAIJ matrix.
4014 
4015    When d_nz, o_nz parameters are specified, d_nz storage elements are
4016    allocated for every row of the local diagonal submatrix, and o_nz
4017    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4018    One way to choose d_nz and o_nz is to use the max nonzerors per local
4019    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4020    In this case, the values of d_nz,o_nz are:
4021 .vb
4022      proc0 : dnz = 2, o_nz = 2
4023      proc1 : dnz = 3, o_nz = 2
4024      proc2 : dnz = 1, o_nz = 4
4025 .ve
4026    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4027    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4028    for proc3. i.e we are using 12+15+10=37 storage locations to store
4029    34 values.
4030 
4031    When d_nnz, o_nnz parameters are specified, the storage is specified
4032    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4033    In the above case the values for d_nnz,o_nnz are:
4034 .vb
4035      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4036      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4037      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4038 .ve
4039    Here the space allocated is sum of all the above values i.e 34, and
4040    hence pre-allocation is perfect.
4041 
4042    Level: intermediate
4043 
4044 .keywords: matrix, aij, compressed row, sparse, parallel
4045 
4046 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4047           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4048 @*/
4049 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4050 {
4051   PetscErrorCode ierr;
4052 
4053   PetscFunctionBegin;
4054   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4055   PetscValidType(B,1);
4056   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4057   PetscFunctionReturn(0);
4058 }
4059 
4060 #undef __FUNCT__
4061 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4062 /*@
4063      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4064          CSR format the local rows.
4065 
4066    Collective on MPI_Comm
4067 
4068    Input Parameters:
4069 +  comm - MPI communicator
4070 .  m - number of local rows (Cannot be PETSC_DECIDE)
4071 .  n - This value should be the same as the local size used in creating the
4072        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4073        calculated if N is given) For square matrices n is almost always m.
4074 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4075 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4076 .   i - row indices
4077 .   j - column indices
4078 -   a - matrix values
4079 
4080    Output Parameter:
4081 .   mat - the matrix
4082 
4083    Level: intermediate
4084 
4085    Notes:
4086        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4087      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4088      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4089 
4090        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4091 
4092        The format which is used for the sparse matrix input, is equivalent to a
4093     row-major ordering.. i.e for the following matrix, the input data expected is
4094     as shown:
4095 
4096         1 0 0
4097         2 0 3     P0
4098        -------
4099         4 5 6     P1
4100 
4101      Process0 [P0]: rows_owned=[0,1]
4102         i =  {0,1,3}  [size = nrow+1  = 2+1]
4103         j =  {0,0,2}  [size = nz = 6]
4104         v =  {1,2,3}  [size = nz = 6]
4105 
4106      Process1 [P1]: rows_owned=[2]
4107         i =  {0,3}    [size = nrow+1  = 1+1]
4108         j =  {0,1,2}  [size = nz = 6]
4109         v =  {4,5,6}  [size = nz = 6]
4110 
4111 .keywords: matrix, aij, compressed row, sparse, parallel
4112 
4113 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4114           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4115 @*/
4116 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4117 {
4118   PetscErrorCode ierr;
4119 
4120   PetscFunctionBegin;
4121   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4122   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4123   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4124   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4125   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4126   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4127   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4128   PetscFunctionReturn(0);
4129 }
4130 
4131 #undef __FUNCT__
4132 #define __FUNCT__ "MatCreateAIJ"
4133 /*@C
4134    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4135    (the default parallel PETSc format).  For good matrix assembly performance
4136    the user should preallocate the matrix storage by setting the parameters
4137    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4138    performance can be increased by more than a factor of 50.
4139 
4140    Collective on MPI_Comm
4141 
4142    Input Parameters:
4143 +  comm - MPI communicator
4144 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4145            This value should be the same as the local size used in creating the
4146            y vector for the matrix-vector product y = Ax.
4147 .  n - This value should be the same as the local size used in creating the
4148        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4149        calculated if N is given) For square matrices n is almost always m.
4150 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4151 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4152 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4153            (same value is used for all local rows)
4154 .  d_nnz - array containing the number of nonzeros in the various rows of the
4155            DIAGONAL portion of the local submatrix (possibly different for each row)
4156            or NULL, if d_nz is used to specify the nonzero structure.
4157            The size of this array is equal to the number of local rows, i.e 'm'.
4158 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4159            submatrix (same value is used for all local rows).
4160 -  o_nnz - array containing the number of nonzeros in the various rows of the
4161            OFF-DIAGONAL portion of the local submatrix (possibly different for
4162            each row) or NULL, if o_nz is used to specify the nonzero
4163            structure. The size of this array is equal to the number
4164            of local rows, i.e 'm'.
4165 
4166    Output Parameter:
4167 .  A - the matrix
4168 
4169    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4170    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4171    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4172 
4173    Notes:
4174    If the *_nnz parameter is given then the *_nz parameter is ignored
4175 
4176    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4177    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4178    storage requirements for this matrix.
4179 
4180    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4181    processor than it must be used on all processors that share the object for
4182    that argument.
4183 
4184    The user MUST specify either the local or global matrix dimensions
4185    (possibly both).
4186 
4187    The parallel matrix is partitioned across processors such that the
4188    first m0 rows belong to process 0, the next m1 rows belong to
4189    process 1, the next m2 rows belong to process 2 etc.. where
4190    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4191    values corresponding to [m x N] submatrix.
4192 
4193    The columns are logically partitioned with the n0 columns belonging
4194    to 0th partition, the next n1 columns belonging to the next
4195    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4196 
4197    The DIAGONAL portion of the local submatrix on any given processor
4198    is the submatrix corresponding to the rows and columns m,n
4199    corresponding to the given processor. i.e diagonal matrix on
4200    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4201    etc. The remaining portion of the local submatrix [m x (N-n)]
4202    constitute the OFF-DIAGONAL portion. The example below better
4203    illustrates this concept.
4204 
4205    For a square global matrix we define each processor's diagonal portion
4206    to be its local rows and the corresponding columns (a square submatrix);
4207    each processor's off-diagonal portion encompasses the remainder of the
4208    local matrix (a rectangular submatrix).
4209 
4210    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4211 
4212    When calling this routine with a single process communicator, a matrix of
4213    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4214    type of communicator, use the construction mechanism:
4215      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4216 
4217    By default, this format uses inodes (identical nodes) when possible.
4218    We search for consecutive rows with the same nonzero structure, thereby
4219    reusing matrix information to achieve increased efficiency.
4220 
4221    Options Database Keys:
4222 +  -mat_no_inode  - Do not use inodes
4223 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4224 -  -mat_aij_oneindex - Internally use indexing starting at 1
4225         rather than 0.  Note that when calling MatSetValues(),
4226         the user still MUST index entries starting at 0!
4227 
4228 
4229    Example usage:
4230 
4231    Consider the following 8x8 matrix with 34 non-zero values, that is
4232    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4233    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4234    as follows:
4235 
4236 .vb
4237             1  2  0  |  0  3  0  |  0  4
4238     Proc0   0  5  6  |  7  0  0  |  8  0
4239             9  0 10  | 11  0  0  | 12  0
4240     -------------------------------------
4241            13  0 14  | 15 16 17  |  0  0
4242     Proc1   0 18  0  | 19 20 21  |  0  0
4243             0  0  0  | 22 23  0  | 24  0
4244     -------------------------------------
4245     Proc2  25 26 27  |  0  0 28  | 29  0
4246            30  0  0  | 31 32 33  |  0 34
4247 .ve
4248 
4249    This can be represented as a collection of submatrices as:
4250 
4251 .vb
4252       A B C
4253       D E F
4254       G H I
4255 .ve
4256 
4257    Where the submatrices A,B,C are owned by proc0, D,E,F are
4258    owned by proc1, G,H,I are owned by proc2.
4259 
4260    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4261    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4262    The 'M','N' parameters are 8,8, and have the same values on all procs.
4263 
4264    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4265    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4266    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4267    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4268    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4269    matrix, ans [DF] as another SeqAIJ matrix.
4270 
4271    When d_nz, o_nz parameters are specified, d_nz storage elements are
4272    allocated for every row of the local diagonal submatrix, and o_nz
4273    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4274    One way to choose d_nz and o_nz is to use the max nonzerors per local
4275    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4276    In this case, the values of d_nz,o_nz are:
4277 .vb
4278      proc0 : dnz = 2, o_nz = 2
4279      proc1 : dnz = 3, o_nz = 2
4280      proc2 : dnz = 1, o_nz = 4
4281 .ve
4282    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4283    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4284    for proc3. i.e we are using 12+15+10=37 storage locations to store
4285    34 values.
4286 
4287    When d_nnz, o_nnz parameters are specified, the storage is specified
4288    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4289    In the above case the values for d_nnz,o_nnz are:
4290 .vb
4291      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4292      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4293      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4294 .ve
4295    Here the space allocated is sum of all the above values i.e 34, and
4296    hence pre-allocation is perfect.
4297 
4298    Level: intermediate
4299 
4300 .keywords: matrix, aij, compressed row, sparse, parallel
4301 
4302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4303           MPIAIJ, MatCreateMPIAIJWithArrays()
4304 @*/
4305 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4306 {
4307   PetscErrorCode ierr;
4308   PetscMPIInt    size;
4309 
4310   PetscFunctionBegin;
4311   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4312   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4313   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4314   if (size > 1) {
4315     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4316     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4317   } else {
4318     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4319     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4320   }
4321   PetscFunctionReturn(0);
4322 }
4323 
4324 #undef __FUNCT__
4325 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4326 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4327 {
4328   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4329 
4330   PetscFunctionBegin;
4331   *Ad     = a->A;
4332   *Ao     = a->B;
4333   *colmap = a->garray;
4334   PetscFunctionReturn(0);
4335 }
4336 
4337 #undef __FUNCT__
4338 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4339 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4340 {
4341   PetscErrorCode ierr;
4342   PetscInt       i;
4343   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4344 
4345   PetscFunctionBegin;
4346   if (coloring->ctype == IS_COLORING_GLOBAL) {
4347     ISColoringValue *allcolors,*colors;
4348     ISColoring      ocoloring;
4349 
4350     /* set coloring for diagonal portion */
4351     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4352 
4353     /* set coloring for off-diagonal portion */
4354     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4355     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4356     for (i=0; i<a->B->cmap->n; i++) {
4357       colors[i] = allcolors[a->garray[i]];
4358     }
4359     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4360     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4361     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4362     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4363   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4364     ISColoringValue *colors;
4365     PetscInt        *larray;
4366     ISColoring      ocoloring;
4367 
4368     /* set coloring for diagonal portion */
4369     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4370     for (i=0; i<a->A->cmap->n; i++) {
4371       larray[i] = i + A->cmap->rstart;
4372     }
4373     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4374     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4375     for (i=0; i<a->A->cmap->n; i++) {
4376       colors[i] = coloring->colors[larray[i]];
4377     }
4378     ierr = PetscFree(larray);CHKERRQ(ierr);
4379     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4380     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4381     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4382 
4383     /* set coloring for off-diagonal portion */
4384     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4385     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4386     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4387     for (i=0; i<a->B->cmap->n; i++) {
4388       colors[i] = coloring->colors[larray[i]];
4389     }
4390     ierr = PetscFree(larray);CHKERRQ(ierr);
4391     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4392     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4393     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4394   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4395   PetscFunctionReturn(0);
4396 }
4397 
4398 #undef __FUNCT__
4399 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4400 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4401 {
4402   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4403   PetscErrorCode ierr;
4404 
4405   PetscFunctionBegin;
4406   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4407   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4408   PetscFunctionReturn(0);
4409 }
4410 
4411 #undef __FUNCT__
4412 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4413 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4414 {
4415   PetscErrorCode ierr;
4416   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4417   PetscInt       *indx;
4418 
4419   PetscFunctionBegin;
4420   /* This routine will ONLY return MPIAIJ type matrix */
4421   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4422   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4423   if (n == PETSC_DECIDE) {
4424     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4425   }
4426   /* Check sum(n) = N */
4427   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4428   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4429 
4430   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4431   rstart -= m;
4432 
4433   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4434   for (i=0; i<m; i++) {
4435     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4436     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4437     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4438   }
4439 
4440   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4441   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4442   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4443   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4444   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4445   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4446   PetscFunctionReturn(0);
4447 }
4448 
4449 #undef __FUNCT__
4450 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4451 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4452 {
4453   PetscErrorCode ierr;
4454   PetscInt       m,N,i,rstart,nnz,Ii;
4455   PetscInt       *indx;
4456   PetscScalar    *values;
4457 
4458   PetscFunctionBegin;
4459   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4460   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4461   for (i=0; i<m; i++) {
4462     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4463     Ii   = i + rstart;
4464     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4465     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4466   }
4467   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4468   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4469   PetscFunctionReturn(0);
4470 }
4471 
4472 #undef __FUNCT__
4473 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4474 /*@
4475       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4476                  matrices from each processor
4477 
4478     Collective on MPI_Comm
4479 
4480    Input Parameters:
4481 +    comm - the communicators the parallel matrix will live on
4482 .    inmat - the input sequential matrices
4483 .    n - number of local columns (or PETSC_DECIDE)
4484 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4485 
4486    Output Parameter:
4487 .    outmat - the parallel matrix generated
4488 
4489     Level: advanced
4490 
4491    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4492 
4493 @*/
4494 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4495 {
4496   PetscErrorCode ierr;
4497 
4498   PetscFunctionBegin;
4499   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4500   if (scall == MAT_INITIAL_MATRIX) {
4501     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4502   }
4503   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4504   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4505   PetscFunctionReturn(0);
4506 }
4507 
4508 #undef __FUNCT__
4509 #define __FUNCT__ "MatFileSplit"
4510 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4511 {
4512   PetscErrorCode    ierr;
4513   PetscMPIInt       rank;
4514   PetscInt          m,N,i,rstart,nnz;
4515   size_t            len;
4516   const PetscInt    *indx;
4517   PetscViewer       out;
4518   char              *name;
4519   Mat               B;
4520   const PetscScalar *values;
4521 
4522   PetscFunctionBegin;
4523   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4524   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4525   /* Should this be the type of the diagonal block of A? */
4526   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4527   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4528   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4529   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4530   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4531   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4532   for (i=0; i<m; i++) {
4533     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4534     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4535     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4536   }
4537   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4538   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4539 
4540   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4541   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4542   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4543   sprintf(name,"%s.%d",outfile,rank);
4544   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4545   ierr = PetscFree(name);CHKERRQ(ierr);
4546   ierr = MatView(B,out);CHKERRQ(ierr);
4547   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4548   ierr = MatDestroy(&B);CHKERRQ(ierr);
4549   PetscFunctionReturn(0);
4550 }
4551 
4552 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4553 #undef __FUNCT__
4554 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4555 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4556 {
4557   PetscErrorCode      ierr;
4558   Mat_Merge_SeqsToMPI *merge;
4559   PetscContainer      container;
4560 
4561   PetscFunctionBegin;
4562   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4563   if (container) {
4564     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4565     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4566     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4567     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4568     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4569     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4570     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4571     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4572     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4573     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4574     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4575     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4576     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4577     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4578     ierr = PetscFree(merge);CHKERRQ(ierr);
4579     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4580   }
4581   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4582   PetscFunctionReturn(0);
4583 }
4584 
4585 #include <../src/mat/utils/freespace.h>
4586 #include <petscbt.h>
4587 
4588 #undef __FUNCT__
4589 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4590 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4591 {
4592   PetscErrorCode      ierr;
4593   MPI_Comm            comm;
4594   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4595   PetscMPIInt         size,rank,taga,*len_s;
4596   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4597   PetscInt            proc,m;
4598   PetscInt            **buf_ri,**buf_rj;
4599   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4600   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4601   MPI_Request         *s_waits,*r_waits;
4602   MPI_Status          *status;
4603   MatScalar           *aa=a->a;
4604   MatScalar           **abuf_r,*ba_i;
4605   Mat_Merge_SeqsToMPI *merge;
4606   PetscContainer      container;
4607 
4608   PetscFunctionBegin;
4609   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4610   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4611 
4612   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4613   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4614 
4615   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4616   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4617 
4618   bi     = merge->bi;
4619   bj     = merge->bj;
4620   buf_ri = merge->buf_ri;
4621   buf_rj = merge->buf_rj;
4622 
4623   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4624   owners = merge->rowmap->range;
4625   len_s  = merge->len_s;
4626 
4627   /* send and recv matrix values */
4628   /*-----------------------------*/
4629   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4630   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4631 
4632   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4633   for (proc=0,k=0; proc<size; proc++) {
4634     if (!len_s[proc]) continue;
4635     i    = owners[proc];
4636     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4637     k++;
4638   }
4639 
4640   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4641   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4642   ierr = PetscFree(status);CHKERRQ(ierr);
4643 
4644   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4645   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4646 
4647   /* insert mat values of mpimat */
4648   /*----------------------------*/
4649   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4650   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4651 
4652   for (k=0; k<merge->nrecv; k++) {
4653     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4654     nrows       = *(buf_ri_k[k]);
4655     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4656     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4657   }
4658 
4659   /* set values of ba */
4660   m = merge->rowmap->n;
4661   for (i=0; i<m; i++) {
4662     arow = owners[rank] + i;
4663     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4664     bnzi = bi[i+1] - bi[i];
4665     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4666 
4667     /* add local non-zero vals of this proc's seqmat into ba */
4668     anzi   = ai[arow+1] - ai[arow];
4669     aj     = a->j + ai[arow];
4670     aa     = a->a + ai[arow];
4671     nextaj = 0;
4672     for (j=0; nextaj<anzi; j++) {
4673       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4674         ba_i[j] += aa[nextaj++];
4675       }
4676     }
4677 
4678     /* add received vals into ba */
4679     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4680       /* i-th row */
4681       if (i == *nextrow[k]) {
4682         anzi   = *(nextai[k]+1) - *nextai[k];
4683         aj     = buf_rj[k] + *(nextai[k]);
4684         aa     = abuf_r[k] + *(nextai[k]);
4685         nextaj = 0;
4686         for (j=0; nextaj<anzi; j++) {
4687           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4688             ba_i[j] += aa[nextaj++];
4689           }
4690         }
4691         nextrow[k]++; nextai[k]++;
4692       }
4693     }
4694     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4695   }
4696   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4697   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4698 
4699   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4700   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4701   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4702   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4703   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4704   PetscFunctionReturn(0);
4705 }
4706 
4707 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4708 
4709 #undef __FUNCT__
4710 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4711 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4712 {
4713   PetscErrorCode      ierr;
4714   Mat                 B_mpi;
4715   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4716   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4717   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4718   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4719   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4720   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4721   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4722   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4723   MPI_Status          *status;
4724   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4725   PetscBT             lnkbt;
4726   Mat_Merge_SeqsToMPI *merge;
4727   PetscContainer      container;
4728 
4729   PetscFunctionBegin;
4730   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4731 
4732   /* make sure it is a PETSc comm */
4733   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4734   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4735   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4736 
4737   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4738   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4739 
4740   /* determine row ownership */
4741   /*---------------------------------------------------------*/
4742   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4743   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4744   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4745   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4746   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4747   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4748   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4749 
4750   m      = merge->rowmap->n;
4751   owners = merge->rowmap->range;
4752 
4753   /* determine the number of messages to send, their lengths */
4754   /*---------------------------------------------------------*/
4755   len_s = merge->len_s;
4756 
4757   len          = 0; /* length of buf_si[] */
4758   merge->nsend = 0;
4759   for (proc=0; proc<size; proc++) {
4760     len_si[proc] = 0;
4761     if (proc == rank) {
4762       len_s[proc] = 0;
4763     } else {
4764       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4765       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4766     }
4767     if (len_s[proc]) {
4768       merge->nsend++;
4769       nrows = 0;
4770       for (i=owners[proc]; i<owners[proc+1]; i++) {
4771         if (ai[i+1] > ai[i]) nrows++;
4772       }
4773       len_si[proc] = 2*(nrows+1);
4774       len         += len_si[proc];
4775     }
4776   }
4777 
4778   /* determine the number and length of messages to receive for ij-structure */
4779   /*-------------------------------------------------------------------------*/
4780   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4781   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4782 
4783   /* post the Irecv of j-structure */
4784   /*-------------------------------*/
4785   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4786   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4787 
4788   /* post the Isend of j-structure */
4789   /*--------------------------------*/
4790   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4791 
4792   for (proc=0, k=0; proc<size; proc++) {
4793     if (!len_s[proc]) continue;
4794     i    = owners[proc];
4795     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4796     k++;
4797   }
4798 
4799   /* receives and sends of j-structure are complete */
4800   /*------------------------------------------------*/
4801   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4802   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4803 
4804   /* send and recv i-structure */
4805   /*---------------------------*/
4806   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4807   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4808 
4809   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4810   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4811   for (proc=0,k=0; proc<size; proc++) {
4812     if (!len_s[proc]) continue;
4813     /* form outgoing message for i-structure:
4814          buf_si[0]:                 nrows to be sent
4815                [1:nrows]:           row index (global)
4816                [nrows+1:2*nrows+1]: i-structure index
4817     */
4818     /*-------------------------------------------*/
4819     nrows       = len_si[proc]/2 - 1;
4820     buf_si_i    = buf_si + nrows+1;
4821     buf_si[0]   = nrows;
4822     buf_si_i[0] = 0;
4823     nrows       = 0;
4824     for (i=owners[proc]; i<owners[proc+1]; i++) {
4825       anzi = ai[i+1] - ai[i];
4826       if (anzi) {
4827         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4828         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4829         nrows++;
4830       }
4831     }
4832     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4833     k++;
4834     buf_si += len_si[proc];
4835   }
4836 
4837   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4838   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4839 
4840   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4841   for (i=0; i<merge->nrecv; i++) {
4842     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4843   }
4844 
4845   ierr = PetscFree(len_si);CHKERRQ(ierr);
4846   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4847   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4848   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4849   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4850   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4851   ierr = PetscFree(status);CHKERRQ(ierr);
4852 
4853   /* compute a local seq matrix in each processor */
4854   /*----------------------------------------------*/
4855   /* allocate bi array and free space for accumulating nonzero column info */
4856   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4857   bi[0] = 0;
4858 
4859   /* create and initialize a linked list */
4860   nlnk = N+1;
4861   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4862 
4863   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4864   len  = ai[owners[rank+1]] - ai[owners[rank]];
4865   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4866 
4867   current_space = free_space;
4868 
4869   /* determine symbolic info for each local row */
4870   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4871 
4872   for (k=0; k<merge->nrecv; k++) {
4873     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4874     nrows       = *buf_ri_k[k];
4875     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4876     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4877   }
4878 
4879   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4880   len  = 0;
4881   for (i=0; i<m; i++) {
4882     bnzi = 0;
4883     /* add local non-zero cols of this proc's seqmat into lnk */
4884     arow  = owners[rank] + i;
4885     anzi  = ai[arow+1] - ai[arow];
4886     aj    = a->j + ai[arow];
4887     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4888     bnzi += nlnk;
4889     /* add received col data into lnk */
4890     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4891       if (i == *nextrow[k]) { /* i-th row */
4892         anzi  = *(nextai[k]+1) - *nextai[k];
4893         aj    = buf_rj[k] + *nextai[k];
4894         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4895         bnzi += nlnk;
4896         nextrow[k]++; nextai[k]++;
4897       }
4898     }
4899     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4900 
4901     /* if free space is not available, make more free space */
4902     if (current_space->local_remaining<bnzi) {
4903       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4904       nspacedouble++;
4905     }
4906     /* copy data into free space, then initialize lnk */
4907     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4908     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4909 
4910     current_space->array           += bnzi;
4911     current_space->local_used      += bnzi;
4912     current_space->local_remaining -= bnzi;
4913 
4914     bi[i+1] = bi[i] + bnzi;
4915   }
4916 
4917   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4918 
4919   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4920   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4921   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4922 
4923   /* create symbolic parallel matrix B_mpi */
4924   /*---------------------------------------*/
4925   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4926   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4927   if (n==PETSC_DECIDE) {
4928     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4929   } else {
4930     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4931   }
4932   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4933   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4934   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4935   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4936   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4937 
4938   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4939   B_mpi->assembled    = PETSC_FALSE;
4940   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4941   merge->bi           = bi;
4942   merge->bj           = bj;
4943   merge->buf_ri       = buf_ri;
4944   merge->buf_rj       = buf_rj;
4945   merge->coi          = NULL;
4946   merge->coj          = NULL;
4947   merge->owners_co    = NULL;
4948 
4949   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4950 
4951   /* attach the supporting struct to B_mpi for reuse */
4952   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4953   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4954   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4955   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4956   *mpimat = B_mpi;
4957 
4958   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4959   PetscFunctionReturn(0);
4960 }
4961 
4962 #undef __FUNCT__
4963 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4964 /*@C
4965       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4966                  matrices from each processor
4967 
4968     Collective on MPI_Comm
4969 
4970    Input Parameters:
4971 +    comm - the communicators the parallel matrix will live on
4972 .    seqmat - the input sequential matrices
4973 .    m - number of local rows (or PETSC_DECIDE)
4974 .    n - number of local columns (or PETSC_DECIDE)
4975 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4976 
4977    Output Parameter:
4978 .    mpimat - the parallel matrix generated
4979 
4980     Level: advanced
4981 
4982    Notes:
4983      The dimensions of the sequential matrix in each processor MUST be the same.
4984      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4985      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4986 @*/
4987 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4988 {
4989   PetscErrorCode ierr;
4990   PetscMPIInt    size;
4991 
4992   PetscFunctionBegin;
4993   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4994   if (size == 1) {
4995     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4996     if (scall == MAT_INITIAL_MATRIX) {
4997       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4998     } else {
4999       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5000     }
5001     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5002     PetscFunctionReturn(0);
5003   }
5004   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5005   if (scall == MAT_INITIAL_MATRIX) {
5006     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5007   }
5008   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5009   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5010   PetscFunctionReturn(0);
5011 }
5012 
5013 #undef __FUNCT__
5014 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5015 /*@
5016      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5017           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5018           with MatGetSize()
5019 
5020     Not Collective
5021 
5022    Input Parameters:
5023 +    A - the matrix
5024 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5025 
5026    Output Parameter:
5027 .    A_loc - the local sequential matrix generated
5028 
5029     Level: developer
5030 
5031 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5032 
5033 @*/
5034 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5035 {
5036   PetscErrorCode ierr;
5037   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5038   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5039   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5040   MatScalar      *aa=a->a,*ba=b->a,*cam;
5041   PetscScalar    *ca;
5042   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5043   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5044   PetscBool      match;
5045 
5046   PetscFunctionBegin;
5047   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5048   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5049   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5050   if (scall == MAT_INITIAL_MATRIX) {
5051     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5052     ci[0] = 0;
5053     for (i=0; i<am; i++) {
5054       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5055     }
5056     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5057     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5058     k    = 0;
5059     for (i=0; i<am; i++) {
5060       ncols_o = bi[i+1] - bi[i];
5061       ncols_d = ai[i+1] - ai[i];
5062       /* off-diagonal portion of A */
5063       for (jo=0; jo<ncols_o; jo++) {
5064         col = cmap[*bj];
5065         if (col >= cstart) break;
5066         cj[k]   = col; bj++;
5067         ca[k++] = *ba++;
5068       }
5069       /* diagonal portion of A */
5070       for (j=0; j<ncols_d; j++) {
5071         cj[k]   = cstart + *aj++;
5072         ca[k++] = *aa++;
5073       }
5074       /* off-diagonal portion of A */
5075       for (j=jo; j<ncols_o; j++) {
5076         cj[k]   = cmap[*bj++];
5077         ca[k++] = *ba++;
5078       }
5079     }
5080     /* put together the new matrix */
5081     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5082     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5083     /* Since these are PETSc arrays, change flags to free them as necessary. */
5084     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5085     mat->free_a  = PETSC_TRUE;
5086     mat->free_ij = PETSC_TRUE;
5087     mat->nonew   = 0;
5088   } else if (scall == MAT_REUSE_MATRIX) {
5089     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5090     ci = mat->i; cj = mat->j; cam = mat->a;
5091     for (i=0; i<am; i++) {
5092       /* off-diagonal portion of A */
5093       ncols_o = bi[i+1] - bi[i];
5094       for (jo=0; jo<ncols_o; jo++) {
5095         col = cmap[*bj];
5096         if (col >= cstart) break;
5097         *cam++ = *ba++; bj++;
5098       }
5099       /* diagonal portion of A */
5100       ncols_d = ai[i+1] - ai[i];
5101       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5102       /* off-diagonal portion of A */
5103       for (j=jo; j<ncols_o; j++) {
5104         *cam++ = *ba++; bj++;
5105       }
5106     }
5107   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5108   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5109   PetscFunctionReturn(0);
5110 }
5111 
5112 #undef __FUNCT__
5113 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5114 /*@C
5115      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5116 
5117     Not Collective
5118 
5119    Input Parameters:
5120 +    A - the matrix
5121 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5122 -    row, col - index sets of rows and columns to extract (or NULL)
5123 
5124    Output Parameter:
5125 .    A_loc - the local sequential matrix generated
5126 
5127     Level: developer
5128 
5129 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5130 
5131 @*/
5132 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5133 {
5134   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5135   PetscErrorCode ierr;
5136   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5137   IS             isrowa,iscola;
5138   Mat            *aloc;
5139   PetscBool      match;
5140 
5141   PetscFunctionBegin;
5142   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5143   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5144   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5145   if (!row) {
5146     start = A->rmap->rstart; end = A->rmap->rend;
5147     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5148   } else {
5149     isrowa = *row;
5150   }
5151   if (!col) {
5152     start = A->cmap->rstart;
5153     cmap  = a->garray;
5154     nzA   = a->A->cmap->n;
5155     nzB   = a->B->cmap->n;
5156     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5157     ncols = 0;
5158     for (i=0; i<nzB; i++) {
5159       if (cmap[i] < start) idx[ncols++] = cmap[i];
5160       else break;
5161     }
5162     imark = i;
5163     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5164     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5165     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5166   } else {
5167     iscola = *col;
5168   }
5169   if (scall != MAT_INITIAL_MATRIX) {
5170     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5171     aloc[0] = *A_loc;
5172   }
5173   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5174   *A_loc = aloc[0];
5175   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5176   if (!row) {
5177     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5178   }
5179   if (!col) {
5180     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5181   }
5182   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5183   PetscFunctionReturn(0);
5184 }
5185 
5186 #undef __FUNCT__
5187 #define __FUNCT__ "MatGetBrowsOfAcols"
5188 /*@C
5189     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5190 
5191     Collective on Mat
5192 
5193    Input Parameters:
5194 +    A,B - the matrices in mpiaij format
5195 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5196 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5197 
5198    Output Parameter:
5199 +    rowb, colb - index sets of rows and columns of B to extract
5200 -    B_seq - the sequential matrix generated
5201 
5202     Level: developer
5203 
5204 @*/
5205 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5206 {
5207   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5208   PetscErrorCode ierr;
5209   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5210   IS             isrowb,iscolb;
5211   Mat            *bseq=NULL;
5212 
5213   PetscFunctionBegin;
5214   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5215     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5216   }
5217   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5218 
5219   if (scall == MAT_INITIAL_MATRIX) {
5220     start = A->cmap->rstart;
5221     cmap  = a->garray;
5222     nzA   = a->A->cmap->n;
5223     nzB   = a->B->cmap->n;
5224     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5225     ncols = 0;
5226     for (i=0; i<nzB; i++) {  /* row < local row index */
5227       if (cmap[i] < start) idx[ncols++] = cmap[i];
5228       else break;
5229     }
5230     imark = i;
5231     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5232     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5233     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5234     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5235   } else {
5236     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5237     isrowb  = *rowb; iscolb = *colb;
5238     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5239     bseq[0] = *B_seq;
5240   }
5241   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5242   *B_seq = bseq[0];
5243   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5244   if (!rowb) {
5245     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5246   } else {
5247     *rowb = isrowb;
5248   }
5249   if (!colb) {
5250     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5251   } else {
5252     *colb = iscolb;
5253   }
5254   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5255   PetscFunctionReturn(0);
5256 }
5257 
5258 #undef __FUNCT__
5259 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5260 /*
5261     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5262     of the OFF-DIAGONAL portion of local A
5263 
5264     Collective on Mat
5265 
5266    Input Parameters:
5267 +    A,B - the matrices in mpiaij format
5268 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5269 
5270    Output Parameter:
5271 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5272 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5273 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5274 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5275 
5276     Level: developer
5277 
5278 */
5279 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5280 {
5281   VecScatter_MPI_General *gen_to,*gen_from;
5282   PetscErrorCode         ierr;
5283   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5284   Mat_SeqAIJ             *b_oth;
5285   VecScatter             ctx =a->Mvctx;
5286   MPI_Comm               comm;
5287   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5288   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5289   PetscScalar            *rvalues,*svalues;
5290   MatScalar              *b_otha,*bufa,*bufA;
5291   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5292   MPI_Request            *rwaits = NULL,*swaits = NULL;
5293   MPI_Status             *sstatus,rstatus;
5294   PetscMPIInt            jj;
5295   PetscInt               *cols,sbs,rbs;
5296   PetscScalar            *vals;
5297 
5298   PetscFunctionBegin;
5299   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5300   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5301     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5302   }
5303   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5304   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5305 
5306   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5307   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5308   rvalues  = gen_from->values; /* holds the length of receiving row */
5309   svalues  = gen_to->values;   /* holds the length of sending row */
5310   nrecvs   = gen_from->n;
5311   nsends   = gen_to->n;
5312 
5313   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5314   srow    = gen_to->indices;    /* local row index to be sent */
5315   sstarts = gen_to->starts;
5316   sprocs  = gen_to->procs;
5317   sstatus = gen_to->sstatus;
5318   sbs     = gen_to->bs;
5319   rstarts = gen_from->starts;
5320   rprocs  = gen_from->procs;
5321   rbs     = gen_from->bs;
5322 
5323   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5324   if (scall == MAT_INITIAL_MATRIX) {
5325     /* i-array */
5326     /*---------*/
5327     /*  post receives */
5328     for (i=0; i<nrecvs; i++) {
5329       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5330       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5331       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5332     }
5333 
5334     /* pack the outgoing message */
5335     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5336 
5337     sstartsj[0] = 0;
5338     rstartsj[0] = 0;
5339     len         = 0; /* total length of j or a array to be sent */
5340     k           = 0;
5341     for (i=0; i<nsends; i++) {
5342       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5343       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5344       for (j=0; j<nrows; j++) {
5345         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5346         for (l=0; l<sbs; l++) {
5347           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5348 
5349           rowlen[j*sbs+l] = ncols;
5350 
5351           len += ncols;
5352           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5353         }
5354         k++;
5355       }
5356       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5357 
5358       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5359     }
5360     /* recvs and sends of i-array are completed */
5361     i = nrecvs;
5362     while (i--) {
5363       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5364     }
5365     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5366 
5367     /* allocate buffers for sending j and a arrays */
5368     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5369     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5370 
5371     /* create i-array of B_oth */
5372     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5373 
5374     b_othi[0] = 0;
5375     len       = 0; /* total length of j or a array to be received */
5376     k         = 0;
5377     for (i=0; i<nrecvs; i++) {
5378       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5379       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5380       for (j=0; j<nrows; j++) {
5381         b_othi[k+1] = b_othi[k] + rowlen[j];
5382         len        += rowlen[j]; k++;
5383       }
5384       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5385     }
5386 
5387     /* allocate space for j and a arrrays of B_oth */
5388     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5389     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5390 
5391     /* j-array */
5392     /*---------*/
5393     /*  post receives of j-array */
5394     for (i=0; i<nrecvs; i++) {
5395       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5396       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5397     }
5398 
5399     /* pack the outgoing message j-array */
5400     k = 0;
5401     for (i=0; i<nsends; i++) {
5402       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5403       bufJ  = bufj+sstartsj[i];
5404       for (j=0; j<nrows; j++) {
5405         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5406         for (ll=0; ll<sbs; ll++) {
5407           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5408           for (l=0; l<ncols; l++) {
5409             *bufJ++ = cols[l];
5410           }
5411           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5412         }
5413       }
5414       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5415     }
5416 
5417     /* recvs and sends of j-array are completed */
5418     i = nrecvs;
5419     while (i--) {
5420       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5421     }
5422     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5423   } else if (scall == MAT_REUSE_MATRIX) {
5424     sstartsj = *startsj_s;
5425     rstartsj = *startsj_r;
5426     bufa     = *bufa_ptr;
5427     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5428     b_otha   = b_oth->a;
5429   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5430 
5431   /* a-array */
5432   /*---------*/
5433   /*  post receives of a-array */
5434   for (i=0; i<nrecvs; i++) {
5435     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5436     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5437   }
5438 
5439   /* pack the outgoing message a-array */
5440   k = 0;
5441   for (i=0; i<nsends; i++) {
5442     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5443     bufA  = bufa+sstartsj[i];
5444     for (j=0; j<nrows; j++) {
5445       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5446       for (ll=0; ll<sbs; ll++) {
5447         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5448         for (l=0; l<ncols; l++) {
5449           *bufA++ = vals[l];
5450         }
5451         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5452       }
5453     }
5454     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5455   }
5456   /* recvs and sends of a-array are completed */
5457   i = nrecvs;
5458   while (i--) {
5459     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5460   }
5461   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5462   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5463 
5464   if (scall == MAT_INITIAL_MATRIX) {
5465     /* put together the new matrix */
5466     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5467 
5468     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5469     /* Since these are PETSc arrays, change flags to free them as necessary. */
5470     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5471     b_oth->free_a  = PETSC_TRUE;
5472     b_oth->free_ij = PETSC_TRUE;
5473     b_oth->nonew   = 0;
5474 
5475     ierr = PetscFree(bufj);CHKERRQ(ierr);
5476     if (!startsj_s || !bufa_ptr) {
5477       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5478       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5479     } else {
5480       *startsj_s = sstartsj;
5481       *startsj_r = rstartsj;
5482       *bufa_ptr  = bufa;
5483     }
5484   }
5485   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5486   PetscFunctionReturn(0);
5487 }
5488 
5489 #undef __FUNCT__
5490 #define __FUNCT__ "MatGetCommunicationStructs"
5491 /*@C
5492   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5493 
5494   Not Collective
5495 
5496   Input Parameters:
5497 . A - The matrix in mpiaij format
5498 
5499   Output Parameter:
5500 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5501 . colmap - A map from global column index to local index into lvec
5502 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5503 
5504   Level: developer
5505 
5506 @*/
5507 #if defined(PETSC_USE_CTABLE)
5508 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5509 #else
5510 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5511 #endif
5512 {
5513   Mat_MPIAIJ *a;
5514 
5515   PetscFunctionBegin;
5516   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5517   PetscValidPointer(lvec, 2);
5518   PetscValidPointer(colmap, 3);
5519   PetscValidPointer(multScatter, 4);
5520   a = (Mat_MPIAIJ*) A->data;
5521   if (lvec) *lvec = a->lvec;
5522   if (colmap) *colmap = a->colmap;
5523   if (multScatter) *multScatter = a->Mvctx;
5524   PetscFunctionReturn(0);
5525 }
5526 
5527 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5528 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5529 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5530 
5531 #undef __FUNCT__
5532 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5533 /*
5534     Computes (B'*A')' since computing B*A directly is untenable
5535 
5536                n                       p                          p
5537         (              )       (              )         (                  )
5538       m (      A       )  *  n (       B      )   =   m (         C        )
5539         (              )       (              )         (                  )
5540 
5541 */
5542 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5543 {
5544   PetscErrorCode ierr;
5545   Mat            At,Bt,Ct;
5546 
5547   PetscFunctionBegin;
5548   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5549   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5550   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5551   ierr = MatDestroy(&At);CHKERRQ(ierr);
5552   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5553   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5554   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5555   PetscFunctionReturn(0);
5556 }
5557 
5558 #undef __FUNCT__
5559 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5560 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5561 {
5562   PetscErrorCode ierr;
5563   PetscInt       m=A->rmap->n,n=B->cmap->n;
5564   Mat            Cmat;
5565 
5566   PetscFunctionBegin;
5567   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5568   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5569   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5570   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5571   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5572   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5573   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5574   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5575 
5576   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5577 
5578   *C = Cmat;
5579   PetscFunctionReturn(0);
5580 }
5581 
5582 /* ----------------------------------------------------------------*/
5583 #undef __FUNCT__
5584 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5585 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5586 {
5587   PetscErrorCode ierr;
5588 
5589   PetscFunctionBegin;
5590   if (scall == MAT_INITIAL_MATRIX) {
5591     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5592     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5593     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5594   }
5595   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5596   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5597   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5598   PetscFunctionReturn(0);
5599 }
5600 
5601 #if defined(PETSC_HAVE_MUMPS)
5602 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5603 #endif
5604 #if defined(PETSC_HAVE_PASTIX)
5605 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5606 #endif
5607 #if defined(PETSC_HAVE_SUPERLU_DIST)
5608 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5609 #endif
5610 #if defined(PETSC_HAVE_CLIQUE)
5611 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5612 #endif
5613 
5614 /*MC
5615    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5616 
5617    Options Database Keys:
5618 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5619 
5620   Level: beginner
5621 
5622 .seealso: MatCreateAIJ()
5623 M*/
5624 
5625 #undef __FUNCT__
5626 #define __FUNCT__ "MatCreate_MPIAIJ"
5627 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5628 {
5629   Mat_MPIAIJ     *b;
5630   PetscErrorCode ierr;
5631   PetscMPIInt    size;
5632 
5633   PetscFunctionBegin;
5634   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5635 
5636   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5637   B->data       = (void*)b;
5638   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5639   B->assembled  = PETSC_FALSE;
5640   B->insertmode = NOT_SET_VALUES;
5641   b->size       = size;
5642 
5643   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5644 
5645   /* build cache for off array entries formed */
5646   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5647 
5648   b->donotstash  = PETSC_FALSE;
5649   b->colmap      = 0;
5650   b->garray      = 0;
5651   b->roworiented = PETSC_TRUE;
5652 
5653   /* stuff used for matrix vector multiply */
5654   b->lvec  = NULL;
5655   b->Mvctx = NULL;
5656 
5657   /* stuff for MatGetRow() */
5658   b->rowindices   = 0;
5659   b->rowvalues    = 0;
5660   b->getrowactive = PETSC_FALSE;
5661 
5662   /* flexible pointer used in CUSP/CUSPARSE classes */
5663   b->spptr = NULL;
5664 
5665 #if defined(PETSC_HAVE_MUMPS)
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5667 #endif
5668 #if defined(PETSC_HAVE_PASTIX)
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5670 #endif
5671 #if defined(PETSC_HAVE_SUPERLU_DIST)
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5673 #endif
5674 #if defined(PETSC_HAVE_CLIQUE)
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5676 #endif
5677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5690   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5691   PetscFunctionReturn(0);
5692 }
5693 
5694 #undef __FUNCT__
5695 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5696 /*@
5697      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5698          and "off-diagonal" part of the matrix in CSR format.
5699 
5700    Collective on MPI_Comm
5701 
5702    Input Parameters:
5703 +  comm - MPI communicator
5704 .  m - number of local rows (Cannot be PETSC_DECIDE)
5705 .  n - This value should be the same as the local size used in creating the
5706        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5707        calculated if N is given) For square matrices n is almost always m.
5708 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5709 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5710 .   i - row indices for "diagonal" portion of matrix
5711 .   j - column indices
5712 .   a - matrix values
5713 .   oi - row indices for "off-diagonal" portion of matrix
5714 .   oj - column indices
5715 -   oa - matrix values
5716 
5717    Output Parameter:
5718 .   mat - the matrix
5719 
5720    Level: advanced
5721 
5722    Notes:
5723        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5724        must free the arrays once the matrix has been destroyed and not before.
5725 
5726        The i and j indices are 0 based
5727 
5728        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5729 
5730        This sets local rows and cannot be used to set off-processor values.
5731 
5732        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5733        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5734        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5735        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5736        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5737        communication if it is known that only local entries will be set.
5738 
5739 .keywords: matrix, aij, compressed row, sparse, parallel
5740 
5741 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5742           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5743 @*/
5744 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5745 {
5746   PetscErrorCode ierr;
5747   Mat_MPIAIJ     *maij;
5748 
5749   PetscFunctionBegin;
5750   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5751   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5752   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5753   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5754   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5755   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5756   maij = (Mat_MPIAIJ*) (*mat)->data;
5757 
5758   (*mat)->preallocated = PETSC_TRUE;
5759 
5760   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5761   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5762 
5763   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5764   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5765 
5766   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5767   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5768   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5769   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5770 
5771   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5772   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5773   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5774   PetscFunctionReturn(0);
5775 }
5776 
5777 /*
5778     Special version for direct calls from Fortran
5779 */
5780 #include <petsc-private/fortranimpl.h>
5781 
5782 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5783 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5784 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5785 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5786 #endif
5787 
5788 /* Change these macros so can be used in void function */
5789 #undef CHKERRQ
5790 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5791 #undef SETERRQ2
5792 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5793 #undef SETERRQ3
5794 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5795 #undef SETERRQ
5796 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5797 
5798 #undef __FUNCT__
5799 #define __FUNCT__ "matsetvaluesmpiaij_"
5800 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5801 {
5802   Mat            mat  = *mmat;
5803   PetscInt       m    = *mm, n = *mn;
5804   InsertMode     addv = *maddv;
5805   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5806   PetscScalar    value;
5807   PetscErrorCode ierr;
5808 
5809   MatCheckPreallocated(mat,1);
5810   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5811 
5812 #if defined(PETSC_USE_DEBUG)
5813   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5814 #endif
5815   {
5816     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5817     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5818     PetscBool roworiented = aij->roworiented;
5819 
5820     /* Some Variables required in the macro */
5821     Mat        A                 = aij->A;
5822     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5823     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5824     MatScalar  *aa               = a->a;
5825     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5826     Mat        B                 = aij->B;
5827     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5828     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5829     MatScalar  *ba               = b->a;
5830 
5831     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5832     PetscInt  nonew = a->nonew;
5833     MatScalar *ap1,*ap2;
5834 
5835     PetscFunctionBegin;
5836     for (i=0; i<m; i++) {
5837       if (im[i] < 0) continue;
5838 #if defined(PETSC_USE_DEBUG)
5839       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5840 #endif
5841       if (im[i] >= rstart && im[i] < rend) {
5842         row      = im[i] - rstart;
5843         lastcol1 = -1;
5844         rp1      = aj + ai[row];
5845         ap1      = aa + ai[row];
5846         rmax1    = aimax[row];
5847         nrow1    = ailen[row];
5848         low1     = 0;
5849         high1    = nrow1;
5850         lastcol2 = -1;
5851         rp2      = bj + bi[row];
5852         ap2      = ba + bi[row];
5853         rmax2    = bimax[row];
5854         nrow2    = bilen[row];
5855         low2     = 0;
5856         high2    = nrow2;
5857 
5858         for (j=0; j<n; j++) {
5859           if (roworiented) value = v[i*n+j];
5860           else value = v[i+j*m];
5861           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5862           if (in[j] >= cstart && in[j] < cend) {
5863             col = in[j] - cstart;
5864             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5865           } else if (in[j] < 0) continue;
5866 #if defined(PETSC_USE_DEBUG)
5867           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5868 #endif
5869           else {
5870             if (mat->was_assembled) {
5871               if (!aij->colmap) {
5872                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5873               }
5874 #if defined(PETSC_USE_CTABLE)
5875               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5876               col--;
5877 #else
5878               col = aij->colmap[in[j]] - 1;
5879 #endif
5880               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5881                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5882                 col  =  in[j];
5883                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5884                 B     = aij->B;
5885                 b     = (Mat_SeqAIJ*)B->data;
5886                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5887                 rp2   = bj + bi[row];
5888                 ap2   = ba + bi[row];
5889                 rmax2 = bimax[row];
5890                 nrow2 = bilen[row];
5891                 low2  = 0;
5892                 high2 = nrow2;
5893                 bm    = aij->B->rmap->n;
5894                 ba    = b->a;
5895               }
5896             } else col = in[j];
5897             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5898           }
5899         }
5900       } else if (!aij->donotstash) {
5901         if (roworiented) {
5902           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5903         } else {
5904           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5905         }
5906       }
5907     }
5908   }
5909   PetscFunctionReturnVoid();
5910 }
5911 
5912