xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e37c6257ed0f98c39d2d110f3ff733a26fbdb7cb)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatDestroy_MatRedundant"
2512 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2513 {
2514   PetscErrorCode ierr;
2515   Mat_Redundant  *redund;
2516   PetscInt       i;
2517   PetscMPIInt    size;
2518 
2519   PetscFunctionBegin;
2520   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2521   if (size == 1) {
2522     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2523     redund = a->redundant;
2524   } else {
2525     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2526     redund = a->redundant;
2527   }
2528   if (redund){
2529     ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2530     ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2531     ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2532     for (i=0; i<redund->nrecvs; i++) {
2533       ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2534       ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2535     }
2536     ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2537 
2538     if (redund->psubcomm) {
2539       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2540     }
2541 
2542     ierr = redund->Destroy(A);CHKERRQ(ierr);
2543     ierr = PetscFree(redund);CHKERRQ(ierr);
2544   }
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 #undef __FUNCT__
2549 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm"
2550 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2551 {
2552   PetscMPIInt    rank,size;
2553   MPI_Comm       comm,subcomm=psubcomm->comm;
2554   PetscErrorCode ierr;
2555   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2556   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2557   PetscInt       *rowrange = mat->rmap->range;
2558   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2559   Mat            A = aij->A,B=aij->B,C=*matredundant;
2560   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2561   PetscScalar    *sbuf_a;
2562   PetscInt       nzlocal=a->nz+b->nz;
2563   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2564   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2565   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2566   MatScalar      *aworkA,*aworkB;
2567   PetscScalar    *vals;
2568   PetscMPIInt    tag1,tag2,tag3,imdex;
2569   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2570   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2571   MPI_Status     recv_status,*send_status;
2572   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2573   PetscInt       **rbuf_j=NULL;
2574   PetscScalar    **rbuf_a=NULL;
2575   Mat_Redundant  *redund =NULL;
2576   PetscInt       rstart_sub,rend_sub,mloc_sub;
2577 
2578   PetscFunctionBegin;
2579   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2580   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2581   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2582   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2583   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2584 
2585   if (reuse == MAT_REUSE_MATRIX) {
2586     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2587     if (subsize == 1) {
2588       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2589       redund = c->redundant;
2590     } else {
2591       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2592       redund = c->redundant;
2593     }
2594     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2595 
2596     nsends    = redund->nsends;
2597     nrecvs    = redund->nrecvs;
2598     send_rank = redund->send_rank;
2599     recv_rank = redund->recv_rank;
2600     sbuf_nz   = redund->sbuf_nz;
2601     rbuf_nz   = redund->rbuf_nz;
2602     sbuf_j    = redund->sbuf_j;
2603     sbuf_a    = redund->sbuf_a;
2604     rbuf_j    = redund->rbuf_j;
2605     rbuf_a    = redund->rbuf_a;
2606   }
2607 
2608   if (reuse == MAT_INITIAL_MATRIX) {
2609     PetscInt    nleftover,np_subcomm;
2610 
2611     /* get local size of redundant matrix */
2612     const PetscInt *range;
2613     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2614     rstart_sub = range[nsubcomm*subrank];
2615     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2616       rend_sub = range[nsubcomm*(subrank+1)];
2617     } else {
2618       rend_sub = mat->rmap->N;
2619     }
2620     mloc_sub = rend_sub - rstart_sub;
2621 
2622     /* get the destination processors' id send_rank, nsends and nrecvs */
2623     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2624 
2625     np_subcomm = size/nsubcomm;
2626     nleftover  = size - nsubcomm*np_subcomm;
2627 
2628     nsends = 0; nrecvs = 0;
2629     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2630       /* -------------------------------------------*/
2631       for (i=0; i<size; i++) {
2632         if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2633           send_rank[nsends]   = i; nsends++;
2634           recv_rank[nrecvs++] = i;
2635           /* printf("[%d] send to and recv from [%d]\n",rank,i); */
2636         }
2637       }
2638       if (rank >= size - nleftover) { /* this proc is a leftover processor */
2639         i = size-nleftover-1;
2640         j = 0;
2641         while (j < nsubcomm - nleftover) {
2642           send_rank[nsends++] = i;
2643           i--; j++;
2644           /* printf("[%d] send to [%d]\n",rank,i); */
2645         }
2646       }
2647 
2648       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2649         for (i=0; i<nleftover; i++) {
2650           recv_rank[nrecvs++] = size-nleftover+i;
2651           /* printf("[%d] recv from [%d]\n",rank,i); */
2652         }
2653       }
2654     } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) {
2655       /* --------------------------------------------------*/
2656       PetscInt color,subcommstart;
2657       subcommstart=0;
2658       for (color=0; color<nsubcomm; color++) {
2659         if (psubcomm->color != color) {
2660           for (i=0; i<psubcomm->subsize[color]; i++) {
2661             if (subrank == i) { /* my_subrank == other's subrank */
2662               send_rank[nsends++] = subcommstart+i;
2663               recv_rank[nrecvs++] = subcommstart+i;
2664               /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */
2665             }
2666           }
2667         }
2668         subcommstart += psubcomm->subsize[color];
2669       }
2670       if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */
2671         subcommstart=0;
2672         for (color=0; color<nsubcomm; color++) {
2673           subcommstart += psubcomm->subsize[color];
2674           if (psubcomm->color == color) continue;
2675           if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */
2676             send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */
2677             /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */
2678           }
2679         }
2680       }
2681 
2682       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2683         subcommstart=0;
2684         for (color=0; color<nsubcomm; color++) {
2685           subcommstart += psubcomm->subsize[color];
2686           if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */
2687             recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */
2688             /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */
2689           }
2690         }
2691       }
2692     } /* endof (psubcomm->type == PETSC_SUBCOMM_INTERLACED) */
2693 
2694     /* allocate sbuf_j, sbuf_a */
2695     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2696     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2697     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2698     /*
2699     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2700     ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
2701      */
2702   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2703 
2704   /* copy mat's local entries into the buffers */
2705   if (reuse == MAT_INITIAL_MATRIX) {
2706     rownz_max = 0;
2707     rptr      = sbuf_j;
2708     cols      = sbuf_j + rend-rstart + 1;
2709     vals      = sbuf_a;
2710     rptr[0]   = 0;
2711     for (i=0; i<rend-rstart; i++) {
2712       row    = i + rstart;
2713       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2714       ncols  = nzA + nzB;
2715       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2716       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2717       /* load the column indices for this row into cols */
2718       lwrite = 0;
2719       for (l=0; l<nzB; l++) {
2720         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2721           vals[lwrite]   = aworkB[l];
2722           cols[lwrite++] = ctmp;
2723         }
2724       }
2725       for (l=0; l<nzA; l++) {
2726         vals[lwrite]   = aworkA[l];
2727         cols[lwrite++] = cstart + cworkA[l];
2728       }
2729       for (l=0; l<nzB; l++) {
2730         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2731           vals[lwrite]   = aworkB[l];
2732           cols[lwrite++] = ctmp;
2733         }
2734       }
2735       vals     += ncols;
2736       cols     += ncols;
2737       rptr[i+1] = rptr[i] + ncols;
2738       if (rownz_max < ncols) rownz_max = ncols;
2739     }
2740     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2741   } else { /* only copy matrix values into sbuf_a */
2742     rptr    = sbuf_j;
2743     vals    = sbuf_a;
2744     rptr[0] = 0;
2745     for (i=0; i<rend-rstart; i++) {
2746       row    = i + rstart;
2747       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2748       ncols  = nzA + nzB;
2749       cworkB = b->j + b->i[i];
2750       aworkA = a->a + a->i[i];
2751       aworkB = b->a + b->i[i];
2752       lwrite = 0;
2753       for (l=0; l<nzB; l++) {
2754         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2755       }
2756       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2757       for (l=0; l<nzB; l++) {
2758         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2759       }
2760       vals     += ncols;
2761       rptr[i+1] = rptr[i] + ncols;
2762     }
2763   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2764 
2765   /* send nzlocal to others, and recv other's nzlocal */
2766   /*--------------------------------------------------*/
2767   if (reuse == MAT_INITIAL_MATRIX) {
2768     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2769 
2770     s_waits2 = s_waits3 + nsends;
2771     s_waits1 = s_waits2 + nsends;
2772     r_waits1 = s_waits1 + nsends;
2773     r_waits2 = r_waits1 + nrecvs;
2774     r_waits3 = r_waits2 + nrecvs;
2775   } else {
2776     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2777 
2778     r_waits3 = s_waits3 + nsends;
2779   }
2780 
2781   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2782   if (reuse == MAT_INITIAL_MATRIX) {
2783     /* get new tags to keep the communication clean */
2784     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2785     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2786     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2787 
2788     /* post receives of other's nzlocal */
2789     for (i=0; i<nrecvs; i++) {
2790       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2791     }
2792     /* send nzlocal to others */
2793     for (i=0; i<nsends; i++) {
2794       sbuf_nz[i] = nzlocal;
2795       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2796     }
2797     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2798     count = nrecvs;
2799     while (count) {
2800       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2801 
2802       recv_rank[imdex] = recv_status.MPI_SOURCE;
2803       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2804       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2805 
2806       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2807 
2808       rbuf_nz[imdex] += i + 2;
2809 
2810       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2811       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2812       count--;
2813     }
2814     /* wait on sends of nzlocal */
2815     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2816     /* send mat->i,j to others, and recv from other's */
2817     /*------------------------------------------------*/
2818     for (i=0; i<nsends; i++) {
2819       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2820       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2821     }
2822     /* wait on receives of mat->i,j */
2823     /*------------------------------*/
2824     count = nrecvs;
2825     while (count) {
2826       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2827       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2828       count--;
2829     }
2830     /* wait on sends of mat->i,j */
2831     /*---------------------------*/
2832     if (nsends) {
2833       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2834     }
2835   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2836 
2837   /* post receives, send and receive mat->a */
2838   /*----------------------------------------*/
2839   for (imdex=0; imdex<nrecvs; imdex++) {
2840     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2841   }
2842   for (i=0; i<nsends; i++) {
2843     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2844   }
2845   count = nrecvs;
2846   while (count) {
2847     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2848     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2849     count--;
2850   }
2851   if (nsends) {
2852     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2853   }
2854 
2855   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2856 
2857   /* create redundant matrix */
2858   /*-------------------------*/
2859   if (reuse == MAT_INITIAL_MATRIX) {
2860     /* compute rownz_max for preallocation */
2861     for (imdex=0; imdex<nrecvs; imdex++) {
2862       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2863       rptr = rbuf_j[imdex];
2864       for (i=0; i<j; i++) {
2865         ncols = rptr[i+1] - rptr[i];
2866         if (rownz_max < ncols) rownz_max = ncols;
2867       }
2868     }
2869 
2870     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2871     if (M == N) {
2872       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2873     } else { /* non-square matrix */
2874       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2875     }
2876     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2877     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2878     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2879     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2880   } else {
2881     C = *matredundant;
2882   }
2883 
2884   /* insert local matrix entries */
2885   rptr = sbuf_j;
2886   cols = sbuf_j + rend-rstart + 1;
2887   vals = sbuf_a;
2888   for (i=0; i<rend-rstart; i++) {
2889     row   = i + rstart;
2890     ncols = rptr[i+1] - rptr[i];
2891     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2892     vals += ncols;
2893     cols += ncols;
2894   }
2895   /* insert received matrix entries */
2896   for (imdex=0; imdex<nrecvs; imdex++) {
2897     rstart = rowrange[recv_rank[imdex]];
2898     rend   = rowrange[recv_rank[imdex]+1];
2899     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2900     rptr   = rbuf_j[imdex];
2901     cols   = rbuf_j[imdex] + rend-rstart + 1;
2902     vals   = rbuf_a[imdex];
2903     for (i=0; i<rend-rstart; i++) {
2904       row   = i + rstart;
2905       ncols = rptr[i+1] - rptr[i];
2906       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2907       vals += ncols;
2908       cols += ncols;
2909     }
2910   }
2911   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2912   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2913 
2914   if (reuse == MAT_INITIAL_MATRIX) {
2915     *matredundant = C;
2916 
2917     /* create a supporting struct and attach it to C for reuse */
2918     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2919     if (subsize == 1) {
2920       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2921       c->redundant = redund;
2922     } else {
2923       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2924       c->redundant = redund;
2925     }
2926 
2927     redund->nzlocal   = nzlocal;
2928     redund->nsends    = nsends;
2929     redund->nrecvs    = nrecvs;
2930     redund->send_rank = send_rank;
2931     redund->recv_rank = recv_rank;
2932     redund->sbuf_nz   = sbuf_nz;
2933     redund->rbuf_nz   = rbuf_nz;
2934     redund->sbuf_j    = sbuf_j;
2935     redund->sbuf_a    = sbuf_a;
2936     redund->rbuf_j    = rbuf_j;
2937     redund->rbuf_a    = rbuf_a;
2938     redund->psubcomm  = NULL;
2939 
2940     redund->Destroy = C->ops->destroy;
2941     C->ops->destroy = MatDestroy_MatRedundant;
2942   }
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 #undef __FUNCT__
2947 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2948 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2949 {
2950   PetscErrorCode ierr;
2951 
2952   PetscFunctionBegin;
2953   /* Only MatGetRedundantMatrix_MPIAIJ_interlaced() is written now */
2954   if (subcomm == MPI_COMM_NULL || subcomm == PETSC_COMM_SELF) { /* create psubcomm */
2955     MPI_Comm       comm;
2956     PetscSubcomm   psubcomm;
2957     PetscMPIInt    size,subsize;
2958     PetscInt       type=2;
2959     PetscBool      flg=PETSC_FALSE;
2960     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2961     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2962     ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2963     ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2964 
2965     ierr = PetscOptionsGetInt(NULL,"-subcomm_type",&type,NULL);CHKERRQ(ierr);
2966     if (type == 2) {
2967       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr);
2968     } else {
2969       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2970     }
2971     ierr = PetscOptionsHasName(NULL, "-psubcomm_view", &flg);CHKERRQ(ierr);
2972     if (flg) {
2973       ierr = PetscSubcommView(psubcomm,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
2974     }
2975 
2976     ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr);
2977 
2978     /* free psubcomm in MatDestroy_MatRedundant() */
2979     ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2980     Mat C = *matredundant;
2981     if (subsize == 1) {
2982       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2983       c->redundant->psubcomm = psubcomm;
2984     } else {
2985       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2986       c->redundant->psubcomm = psubcomm ;
2987     }
2988   } else {
2989     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet");
2990   }
2991   PetscFunctionReturn(0);
2992 }
2993 
2994 #undef __FUNCT__
2995 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2996 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2997 {
2998   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2999   PetscErrorCode ierr;
3000   PetscInt       i,*idxb = 0;
3001   PetscScalar    *va,*vb;
3002   Vec            vtmp;
3003 
3004   PetscFunctionBegin;
3005   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
3006   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3007   if (idx) {
3008     for (i=0; i<A->rmap->n; i++) {
3009       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3010     }
3011   }
3012 
3013   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3014   if (idx) {
3015     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3016   }
3017   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3018   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3019 
3020   for (i=0; i<A->rmap->n; i++) {
3021     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3022       va[i] = vb[i];
3023       if (idx) idx[i] = a->garray[idxb[i]];
3024     }
3025   }
3026 
3027   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3028   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3029   ierr = PetscFree(idxb);CHKERRQ(ierr);
3030   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3031   PetscFunctionReturn(0);
3032 }
3033 
3034 #undef __FUNCT__
3035 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
3036 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3037 {
3038   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3039   PetscErrorCode ierr;
3040   PetscInt       i,*idxb = 0;
3041   PetscScalar    *va,*vb;
3042   Vec            vtmp;
3043 
3044   PetscFunctionBegin;
3045   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
3046   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3047   if (idx) {
3048     for (i=0; i<A->cmap->n; i++) {
3049       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3050     }
3051   }
3052 
3053   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3054   if (idx) {
3055     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3056   }
3057   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3058   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3059 
3060   for (i=0; i<A->rmap->n; i++) {
3061     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3062       va[i] = vb[i];
3063       if (idx) idx[i] = a->garray[idxb[i]];
3064     }
3065   }
3066 
3067   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3068   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3069   ierr = PetscFree(idxb);CHKERRQ(ierr);
3070   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3071   PetscFunctionReturn(0);
3072 }
3073 
3074 #undef __FUNCT__
3075 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3076 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3077 {
3078   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3079   PetscInt       n      = A->rmap->n;
3080   PetscInt       cstart = A->cmap->rstart;
3081   PetscInt       *cmap  = mat->garray;
3082   PetscInt       *diagIdx, *offdiagIdx;
3083   Vec            diagV, offdiagV;
3084   PetscScalar    *a, *diagA, *offdiagA;
3085   PetscInt       r;
3086   PetscErrorCode ierr;
3087 
3088   PetscFunctionBegin;
3089   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3090   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3091   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3092   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3093   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3094   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3095   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3096   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3097   for (r = 0; r < n; ++r) {
3098     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3099       a[r]   = diagA[r];
3100       idx[r] = cstart + diagIdx[r];
3101     } else {
3102       a[r]   = offdiagA[r];
3103       idx[r] = cmap[offdiagIdx[r]];
3104     }
3105   }
3106   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3107   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3108   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3109   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3110   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3111   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3112   PetscFunctionReturn(0);
3113 }
3114 
3115 #undef __FUNCT__
3116 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3117 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3118 {
3119   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3120   PetscInt       n      = A->rmap->n;
3121   PetscInt       cstart = A->cmap->rstart;
3122   PetscInt       *cmap  = mat->garray;
3123   PetscInt       *diagIdx, *offdiagIdx;
3124   Vec            diagV, offdiagV;
3125   PetscScalar    *a, *diagA, *offdiagA;
3126   PetscInt       r;
3127   PetscErrorCode ierr;
3128 
3129   PetscFunctionBegin;
3130   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3131   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3132   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3133   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3134   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3135   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3136   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3137   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3138   for (r = 0; r < n; ++r) {
3139     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3140       a[r]   = diagA[r];
3141       idx[r] = cstart + diagIdx[r];
3142     } else {
3143       a[r]   = offdiagA[r];
3144       idx[r] = cmap[offdiagIdx[r]];
3145     }
3146   }
3147   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3148   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3149   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3150   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3151   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3152   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3153   PetscFunctionReturn(0);
3154 }
3155 
3156 #undef __FUNCT__
3157 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3158 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3159 {
3160   PetscErrorCode ierr;
3161   Mat            *dummy;
3162 
3163   PetscFunctionBegin;
3164   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3165   *newmat = *dummy;
3166   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3167   PetscFunctionReturn(0);
3168 }
3169 
3170 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3171 
3172 #undef __FUNCT__
3173 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3174 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3175 {
3176   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3177   PetscErrorCode ierr;
3178 
3179   PetscFunctionBegin;
3180   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3181   PetscFunctionReturn(0);
3182 }
3183 
3184 #undef __FUNCT__
3185 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3186 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3187 {
3188   PetscErrorCode ierr;
3189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3190 
3191   PetscFunctionBegin;
3192   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3193   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3194   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3195   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3196   PetscFunctionReturn(0);
3197 }
3198 
3199 /* -------------------------------------------------------------------*/
3200 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3201                                        MatGetRow_MPIAIJ,
3202                                        MatRestoreRow_MPIAIJ,
3203                                        MatMult_MPIAIJ,
3204                                 /* 4*/ MatMultAdd_MPIAIJ,
3205                                        MatMultTranspose_MPIAIJ,
3206                                        MatMultTransposeAdd_MPIAIJ,
3207 #if defined(PETSC_HAVE_PBGL)
3208                                        MatSolve_MPIAIJ,
3209 #else
3210                                        0,
3211 #endif
3212                                        0,
3213                                        0,
3214                                 /*10*/ 0,
3215                                        0,
3216                                        0,
3217                                        MatSOR_MPIAIJ,
3218                                        MatTranspose_MPIAIJ,
3219                                 /*15*/ MatGetInfo_MPIAIJ,
3220                                        MatEqual_MPIAIJ,
3221                                        MatGetDiagonal_MPIAIJ,
3222                                        MatDiagonalScale_MPIAIJ,
3223                                        MatNorm_MPIAIJ,
3224                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3225                                        MatAssemblyEnd_MPIAIJ,
3226                                        MatSetOption_MPIAIJ,
3227                                        MatZeroEntries_MPIAIJ,
3228                                 /*24*/ MatZeroRows_MPIAIJ,
3229                                        0,
3230 #if defined(PETSC_HAVE_PBGL)
3231                                        0,
3232 #else
3233                                        0,
3234 #endif
3235                                        0,
3236                                        0,
3237                                 /*29*/ MatSetUp_MPIAIJ,
3238 #if defined(PETSC_HAVE_PBGL)
3239                                        0,
3240 #else
3241                                        0,
3242 #endif
3243                                        0,
3244                                        0,
3245                                        0,
3246                                 /*34*/ MatDuplicate_MPIAIJ,
3247                                        0,
3248                                        0,
3249                                        0,
3250                                        0,
3251                                 /*39*/ MatAXPY_MPIAIJ,
3252                                        MatGetSubMatrices_MPIAIJ,
3253                                        MatIncreaseOverlap_MPIAIJ,
3254                                        MatGetValues_MPIAIJ,
3255                                        MatCopy_MPIAIJ,
3256                                 /*44*/ MatGetRowMax_MPIAIJ,
3257                                        MatScale_MPIAIJ,
3258                                        0,
3259                                        0,
3260                                        MatZeroRowsColumns_MPIAIJ,
3261                                 /*49*/ MatSetRandom_MPIAIJ,
3262                                        0,
3263                                        0,
3264                                        0,
3265                                        0,
3266                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3267                                        0,
3268                                        MatSetUnfactored_MPIAIJ,
3269                                        MatPermute_MPIAIJ,
3270                                        0,
3271                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3272                                        MatDestroy_MPIAIJ,
3273                                        MatView_MPIAIJ,
3274                                        0,
3275                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3276                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3277                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3278                                        0,
3279                                        0,
3280                                        0,
3281                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3282                                        MatGetRowMinAbs_MPIAIJ,
3283                                        0,
3284                                        MatSetColoring_MPIAIJ,
3285                                        0,
3286                                        MatSetValuesAdifor_MPIAIJ,
3287                                 /*75*/ MatFDColoringApply_AIJ,
3288                                        0,
3289                                        0,
3290                                        0,
3291                                        MatFindZeroDiagonals_MPIAIJ,
3292                                 /*80*/ 0,
3293                                        0,
3294                                        0,
3295                                 /*83*/ MatLoad_MPIAIJ,
3296                                        0,
3297                                        0,
3298                                        0,
3299                                        0,
3300                                        0,
3301                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3302                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3303                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3304                                        MatPtAP_MPIAIJ_MPIAIJ,
3305                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3306                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3307                                        0,
3308                                        0,
3309                                        0,
3310                                        0,
3311                                 /*99*/ 0,
3312                                        0,
3313                                        0,
3314                                        MatConjugate_MPIAIJ,
3315                                        0,
3316                                 /*104*/MatSetValuesRow_MPIAIJ,
3317                                        MatRealPart_MPIAIJ,
3318                                        MatImaginaryPart_MPIAIJ,
3319                                        0,
3320                                        0,
3321                                 /*109*/0,
3322                                        MatGetRedundantMatrix_MPIAIJ,
3323                                        MatGetRowMin_MPIAIJ,
3324                                        0,
3325                                        0,
3326                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3327                                        0,
3328                                        0,
3329                                        0,
3330                                        0,
3331                                 /*119*/0,
3332                                        0,
3333                                        0,
3334                                        0,
3335                                        MatGetMultiProcBlock_MPIAIJ,
3336                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3337                                        MatGetColumnNorms_MPIAIJ,
3338                                        MatInvertBlockDiagonal_MPIAIJ,
3339                                        0,
3340                                        MatGetSubMatricesParallel_MPIAIJ,
3341                                 /*129*/0,
3342                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3343                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3344                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3345                                        0,
3346                                 /*134*/0,
3347                                        0,
3348                                        0,
3349                                        0,
3350                                        0,
3351                                 /*139*/0,
3352                                        0
3353 };
3354 
3355 /* ----------------------------------------------------------------------------------------*/
3356 
3357 #undef __FUNCT__
3358 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3359 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3360 {
3361   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3362   PetscErrorCode ierr;
3363 
3364   PetscFunctionBegin;
3365   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3366   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3367   PetscFunctionReturn(0);
3368 }
3369 
3370 #undef __FUNCT__
3371 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3372 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3373 {
3374   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3375   PetscErrorCode ierr;
3376 
3377   PetscFunctionBegin;
3378   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3379   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3380   PetscFunctionReturn(0);
3381 }
3382 
3383 #undef __FUNCT__
3384 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3385 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3386 {
3387   Mat_MPIAIJ     *b;
3388   PetscErrorCode ierr;
3389 
3390   PetscFunctionBegin;
3391   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3392   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3393   b = (Mat_MPIAIJ*)B->data;
3394 
3395   if (!B->preallocated) {
3396     /* Explicitly create 2 MATSEQAIJ matrices. */
3397     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3398     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3399     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3400     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3401     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3402     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3403     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3404     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3405     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3406     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3407   }
3408 
3409   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3410   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3411   B->preallocated = PETSC_TRUE;
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 #undef __FUNCT__
3416 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3417 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3418 {
3419   Mat            mat;
3420   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3421   PetscErrorCode ierr;
3422 
3423   PetscFunctionBegin;
3424   *newmat = 0;
3425   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3426   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3427   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3428   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3429   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3430   a       = (Mat_MPIAIJ*)mat->data;
3431 
3432   mat->factortype   = matin->factortype;
3433   mat->rmap->bs     = matin->rmap->bs;
3434   mat->cmap->bs     = matin->cmap->bs;
3435   mat->assembled    = PETSC_TRUE;
3436   mat->insertmode   = NOT_SET_VALUES;
3437   mat->preallocated = PETSC_TRUE;
3438 
3439   a->size         = oldmat->size;
3440   a->rank         = oldmat->rank;
3441   a->donotstash   = oldmat->donotstash;
3442   a->roworiented  = oldmat->roworiented;
3443   a->rowindices   = 0;
3444   a->rowvalues    = 0;
3445   a->getrowactive = PETSC_FALSE;
3446 
3447   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3448   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3449 
3450   if (oldmat->colmap) {
3451 #if defined(PETSC_USE_CTABLE)
3452     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3453 #else
3454     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3455     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3456     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3457 #endif
3458   } else a->colmap = 0;
3459   if (oldmat->garray) {
3460     PetscInt len;
3461     len  = oldmat->B->cmap->n;
3462     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3463     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3464     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3465   } else a->garray = 0;
3466 
3467   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3468   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3469   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3470   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3471   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3472   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3473   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3474   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3475   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3476   *newmat = mat;
3477   PetscFunctionReturn(0);
3478 }
3479 
3480 
3481 
3482 #undef __FUNCT__
3483 #define __FUNCT__ "MatLoad_MPIAIJ"
3484 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3485 {
3486   PetscScalar    *vals,*svals;
3487   MPI_Comm       comm;
3488   PetscErrorCode ierr;
3489   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3490   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3491   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3492   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3493   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3494   int            fd;
3495   PetscInt       bs = 1;
3496 
3497   PetscFunctionBegin;
3498   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3499   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3500   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3501   if (!rank) {
3502     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3503     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3504     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3505   }
3506 
3507   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3508   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3509   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3510 
3511   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3512 
3513   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3514   M    = header[1]; N = header[2];
3515   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3516   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3517   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3518 
3519   /* If global sizes are set, check if they are consistent with that given in the file */
3520   if (sizesset) {
3521     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3522   }
3523   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3524   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3525 
3526   /* determine ownership of all (block) rows */
3527   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3528   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3529   else m = newMat->rmap->n; /* Set by user */
3530 
3531   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3532   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3533 
3534   /* First process needs enough room for process with most rows */
3535   if (!rank) {
3536     mmax = rowners[1];
3537     for (i=2; i<=size; i++) {
3538       mmax = PetscMax(mmax, rowners[i]);
3539     }
3540   } else mmax = -1;             /* unused, but compilers complain */
3541 
3542   rowners[0] = 0;
3543   for (i=2; i<=size; i++) {
3544     rowners[i] += rowners[i-1];
3545   }
3546   rstart = rowners[rank];
3547   rend   = rowners[rank+1];
3548 
3549   /* distribute row lengths to all processors */
3550   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3551   if (!rank) {
3552     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3553     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3554     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3555     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3556     for (j=0; j<m; j++) {
3557       procsnz[0] += ourlens[j];
3558     }
3559     for (i=1; i<size; i++) {
3560       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3561       /* calculate the number of nonzeros on each processor */
3562       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3563         procsnz[i] += rowlengths[j];
3564       }
3565       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3566     }
3567     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3568   } else {
3569     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3570   }
3571 
3572   if (!rank) {
3573     /* determine max buffer needed and allocate it */
3574     maxnz = 0;
3575     for (i=0; i<size; i++) {
3576       maxnz = PetscMax(maxnz,procsnz[i]);
3577     }
3578     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3579 
3580     /* read in my part of the matrix column indices  */
3581     nz   = procsnz[0];
3582     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3583     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3584 
3585     /* read in every one elses and ship off */
3586     for (i=1; i<size; i++) {
3587       nz   = procsnz[i];
3588       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3589       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3590     }
3591     ierr = PetscFree(cols);CHKERRQ(ierr);
3592   } else {
3593     /* determine buffer space needed for message */
3594     nz = 0;
3595     for (i=0; i<m; i++) {
3596       nz += ourlens[i];
3597     }
3598     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3599 
3600     /* receive message of column indices*/
3601     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3602   }
3603 
3604   /* determine column ownership if matrix is not square */
3605   if (N != M) {
3606     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3607     else n = newMat->cmap->n;
3608     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3609     cstart = cend - n;
3610   } else {
3611     cstart = rstart;
3612     cend   = rend;
3613     n      = cend - cstart;
3614   }
3615 
3616   /* loop over local rows, determining number of off diagonal entries */
3617   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3618   jj   = 0;
3619   for (i=0; i<m; i++) {
3620     for (j=0; j<ourlens[i]; j++) {
3621       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3622       jj++;
3623     }
3624   }
3625 
3626   for (i=0; i<m; i++) {
3627     ourlens[i] -= offlens[i];
3628   }
3629   if (!sizesset) {
3630     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3631   }
3632 
3633   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3634 
3635   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3636 
3637   for (i=0; i<m; i++) {
3638     ourlens[i] += offlens[i];
3639   }
3640 
3641   if (!rank) {
3642     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3643 
3644     /* read in my part of the matrix numerical values  */
3645     nz   = procsnz[0];
3646     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3647 
3648     /* insert into matrix */
3649     jj      = rstart;
3650     smycols = mycols;
3651     svals   = vals;
3652     for (i=0; i<m; i++) {
3653       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3654       smycols += ourlens[i];
3655       svals   += ourlens[i];
3656       jj++;
3657     }
3658 
3659     /* read in other processors and ship out */
3660     for (i=1; i<size; i++) {
3661       nz   = procsnz[i];
3662       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3663       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3664     }
3665     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3666   } else {
3667     /* receive numeric values */
3668     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3669 
3670     /* receive message of values*/
3671     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3672 
3673     /* insert into matrix */
3674     jj      = rstart;
3675     smycols = mycols;
3676     svals   = vals;
3677     for (i=0; i<m; i++) {
3678       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3679       smycols += ourlens[i];
3680       svals   += ourlens[i];
3681       jj++;
3682     }
3683   }
3684   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3685   ierr = PetscFree(vals);CHKERRQ(ierr);
3686   ierr = PetscFree(mycols);CHKERRQ(ierr);
3687   ierr = PetscFree(rowners);CHKERRQ(ierr);
3688   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3689   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3690   PetscFunctionReturn(0);
3691 }
3692 
3693 #undef __FUNCT__
3694 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3695 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3696 {
3697   PetscErrorCode ierr;
3698   IS             iscol_local;
3699   PetscInt       csize;
3700 
3701   PetscFunctionBegin;
3702   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3703   if (call == MAT_REUSE_MATRIX) {
3704     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3705     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3706   } else {
3707     PetscInt cbs;
3708     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3709     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3710     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3711   }
3712   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3713   if (call == MAT_INITIAL_MATRIX) {
3714     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3715     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3716   }
3717   PetscFunctionReturn(0);
3718 }
3719 
3720 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3721 #undef __FUNCT__
3722 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3723 /*
3724     Not great since it makes two copies of the submatrix, first an SeqAIJ
3725   in local and then by concatenating the local matrices the end result.
3726   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3727 
3728   Note: This requires a sequential iscol with all indices.
3729 */
3730 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3731 {
3732   PetscErrorCode ierr;
3733   PetscMPIInt    rank,size;
3734   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3735   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3736   PetscBool      allcolumns, colflag;
3737   Mat            M,Mreuse;
3738   MatScalar      *vwork,*aa;
3739   MPI_Comm       comm;
3740   Mat_SeqAIJ     *aij;
3741 
3742   PetscFunctionBegin;
3743   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3744   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3745   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3746 
3747   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3748   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3749   if (colflag && ncol == mat->cmap->N) {
3750     allcolumns = PETSC_TRUE;
3751   } else {
3752     allcolumns = PETSC_FALSE;
3753   }
3754   if (call ==  MAT_REUSE_MATRIX) {
3755     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3756     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3757     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3758   } else {
3759     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3760   }
3761 
3762   /*
3763       m - number of local rows
3764       n - number of columns (same on all processors)
3765       rstart - first row in new global matrix generated
3766   */
3767   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3768   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3769   if (call == MAT_INITIAL_MATRIX) {
3770     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3771     ii  = aij->i;
3772     jj  = aij->j;
3773 
3774     /*
3775         Determine the number of non-zeros in the diagonal and off-diagonal
3776         portions of the matrix in order to do correct preallocation
3777     */
3778 
3779     /* first get start and end of "diagonal" columns */
3780     if (csize == PETSC_DECIDE) {
3781       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3782       if (mglobal == n) { /* square matrix */
3783         nlocal = m;
3784       } else {
3785         nlocal = n/size + ((n % size) > rank);
3786       }
3787     } else {
3788       nlocal = csize;
3789     }
3790     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3791     rstart = rend - nlocal;
3792     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3793 
3794     /* next, compute all the lengths */
3795     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3796     olens = dlens + m;
3797     for (i=0; i<m; i++) {
3798       jend = ii[i+1] - ii[i];
3799       olen = 0;
3800       dlen = 0;
3801       for (j=0; j<jend; j++) {
3802         if (*jj < rstart || *jj >= rend) olen++;
3803         else dlen++;
3804         jj++;
3805       }
3806       olens[i] = olen;
3807       dlens[i] = dlen;
3808     }
3809     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3810     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3811     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3812     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3813     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3814     ierr = PetscFree(dlens);CHKERRQ(ierr);
3815   } else {
3816     PetscInt ml,nl;
3817 
3818     M    = *newmat;
3819     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3820     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3821     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3822     /*
3823          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3824        rather than the slower MatSetValues().
3825     */
3826     M->was_assembled = PETSC_TRUE;
3827     M->assembled     = PETSC_FALSE;
3828   }
3829   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3830   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3831   ii   = aij->i;
3832   jj   = aij->j;
3833   aa   = aij->a;
3834   for (i=0; i<m; i++) {
3835     row   = rstart + i;
3836     nz    = ii[i+1] - ii[i];
3837     cwork = jj;     jj += nz;
3838     vwork = aa;     aa += nz;
3839     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3840   }
3841 
3842   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3843   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3844   *newmat = M;
3845 
3846   /* save submatrix used in processor for next request */
3847   if (call ==  MAT_INITIAL_MATRIX) {
3848     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3849     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3850   }
3851   PetscFunctionReturn(0);
3852 }
3853 
3854 #undef __FUNCT__
3855 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3856 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3857 {
3858   PetscInt       m,cstart, cend,j,nnz,i,d;
3859   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3860   const PetscInt *JJ;
3861   PetscScalar    *values;
3862   PetscErrorCode ierr;
3863 
3864   PetscFunctionBegin;
3865   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3866 
3867   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3868   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3869   m      = B->rmap->n;
3870   cstart = B->cmap->rstart;
3871   cend   = B->cmap->rend;
3872   rstart = B->rmap->rstart;
3873 
3874   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3875 
3876 #if defined(PETSC_USE_DEBUGGING)
3877   for (i=0; i<m; i++) {
3878     nnz = Ii[i+1]- Ii[i];
3879     JJ  = J + Ii[i];
3880     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3881     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3882     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3883   }
3884 #endif
3885 
3886   for (i=0; i<m; i++) {
3887     nnz     = Ii[i+1]- Ii[i];
3888     JJ      = J + Ii[i];
3889     nnz_max = PetscMax(nnz_max,nnz);
3890     d       = 0;
3891     for (j=0; j<nnz; j++) {
3892       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3893     }
3894     d_nnz[i] = d;
3895     o_nnz[i] = nnz - d;
3896   }
3897   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3898   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3899 
3900   if (v) values = (PetscScalar*)v;
3901   else {
3902     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3903     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3904   }
3905 
3906   for (i=0; i<m; i++) {
3907     ii   = i + rstart;
3908     nnz  = Ii[i+1]- Ii[i];
3909     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3910   }
3911   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3912   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3913 
3914   if (!v) {
3915     ierr = PetscFree(values);CHKERRQ(ierr);
3916   }
3917   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3918   PetscFunctionReturn(0);
3919 }
3920 
3921 #undef __FUNCT__
3922 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3923 /*@
3924    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3925    (the default parallel PETSc format).
3926 
3927    Collective on MPI_Comm
3928 
3929    Input Parameters:
3930 +  B - the matrix
3931 .  i - the indices into j for the start of each local row (starts with zero)
3932 .  j - the column indices for each local row (starts with zero)
3933 -  v - optional values in the matrix
3934 
3935    Level: developer
3936 
3937    Notes:
3938        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3939      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3940      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3941 
3942        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3943 
3944        The format which is used for the sparse matrix input, is equivalent to a
3945     row-major ordering.. i.e for the following matrix, the input data expected is
3946     as shown:
3947 
3948         1 0 0
3949         2 0 3     P0
3950        -------
3951         4 5 6     P1
3952 
3953      Process0 [P0]: rows_owned=[0,1]
3954         i =  {0,1,3}  [size = nrow+1  = 2+1]
3955         j =  {0,0,2}  [size = nz = 6]
3956         v =  {1,2,3}  [size = nz = 6]
3957 
3958      Process1 [P1]: rows_owned=[2]
3959         i =  {0,3}    [size = nrow+1  = 1+1]
3960         j =  {0,1,2}  [size = nz = 6]
3961         v =  {4,5,6}  [size = nz = 6]
3962 
3963 .keywords: matrix, aij, compressed row, sparse, parallel
3964 
3965 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3966           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3967 @*/
3968 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3969 {
3970   PetscErrorCode ierr;
3971 
3972   PetscFunctionBegin;
3973   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3974   PetscFunctionReturn(0);
3975 }
3976 
3977 #undef __FUNCT__
3978 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3979 /*@C
3980    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3981    (the default parallel PETSc format).  For good matrix assembly performance
3982    the user should preallocate the matrix storage by setting the parameters
3983    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3984    performance can be increased by more than a factor of 50.
3985 
3986    Collective on MPI_Comm
3987 
3988    Input Parameters:
3989 +  A - the matrix
3990 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3991            (same value is used for all local rows)
3992 .  d_nnz - array containing the number of nonzeros in the various rows of the
3993            DIAGONAL portion of the local submatrix (possibly different for each row)
3994            or NULL, if d_nz is used to specify the nonzero structure.
3995            The size of this array is equal to the number of local rows, i.e 'm'.
3996            For matrices that will be factored, you must leave room for (and set)
3997            the diagonal entry even if it is zero.
3998 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3999            submatrix (same value is used for all local rows).
4000 -  o_nnz - array containing the number of nonzeros in the various rows of the
4001            OFF-DIAGONAL portion of the local submatrix (possibly different for
4002            each row) or NULL, if o_nz is used to specify the nonzero
4003            structure. The size of this array is equal to the number
4004            of local rows, i.e 'm'.
4005 
4006    If the *_nnz parameter is given then the *_nz parameter is ignored
4007 
4008    The AIJ format (also called the Yale sparse matrix format or
4009    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4010    storage.  The stored row and column indices begin with zero.
4011    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
4012 
4013    The parallel matrix is partitioned such that the first m0 rows belong to
4014    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4015    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4016 
4017    The DIAGONAL portion of the local submatrix of a processor can be defined
4018    as the submatrix which is obtained by extraction the part corresponding to
4019    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4020    first row that belongs to the processor, r2 is the last row belonging to
4021    the this processor, and c1-c2 is range of indices of the local part of a
4022    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4023    common case of a square matrix, the row and column ranges are the same and
4024    the DIAGONAL part is also square. The remaining portion of the local
4025    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4026 
4027    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4028 
4029    You can call MatGetInfo() to get information on how effective the preallocation was;
4030    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4031    You can also run with the option -info and look for messages with the string
4032    malloc in them to see if additional memory allocation was needed.
4033 
4034    Example usage:
4035 
4036    Consider the following 8x8 matrix with 34 non-zero values, that is
4037    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4038    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4039    as follows:
4040 
4041 .vb
4042             1  2  0  |  0  3  0  |  0  4
4043     Proc0   0  5  6  |  7  0  0  |  8  0
4044             9  0 10  | 11  0  0  | 12  0
4045     -------------------------------------
4046            13  0 14  | 15 16 17  |  0  0
4047     Proc1   0 18  0  | 19 20 21  |  0  0
4048             0  0  0  | 22 23  0  | 24  0
4049     -------------------------------------
4050     Proc2  25 26 27  |  0  0 28  | 29  0
4051            30  0  0  | 31 32 33  |  0 34
4052 .ve
4053 
4054    This can be represented as a collection of submatrices as:
4055 
4056 .vb
4057       A B C
4058       D E F
4059       G H I
4060 .ve
4061 
4062    Where the submatrices A,B,C are owned by proc0, D,E,F are
4063    owned by proc1, G,H,I are owned by proc2.
4064 
4065    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4066    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4067    The 'M','N' parameters are 8,8, and have the same values on all procs.
4068 
4069    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4070    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4071    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4072    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4073    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4074    matrix, ans [DF] as another SeqAIJ matrix.
4075 
4076    When d_nz, o_nz parameters are specified, d_nz storage elements are
4077    allocated for every row of the local diagonal submatrix, and o_nz
4078    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4079    One way to choose d_nz and o_nz is to use the max nonzerors per local
4080    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4081    In this case, the values of d_nz,o_nz are:
4082 .vb
4083      proc0 : dnz = 2, o_nz = 2
4084      proc1 : dnz = 3, o_nz = 2
4085      proc2 : dnz = 1, o_nz = 4
4086 .ve
4087    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4088    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4089    for proc3. i.e we are using 12+15+10=37 storage locations to store
4090    34 values.
4091 
4092    When d_nnz, o_nnz parameters are specified, the storage is specified
4093    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4094    In the above case the values for d_nnz,o_nnz are:
4095 .vb
4096      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4097      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4098      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4099 .ve
4100    Here the space allocated is sum of all the above values i.e 34, and
4101    hence pre-allocation is perfect.
4102 
4103    Level: intermediate
4104 
4105 .keywords: matrix, aij, compressed row, sparse, parallel
4106 
4107 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4108           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4109 @*/
4110 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4111 {
4112   PetscErrorCode ierr;
4113 
4114   PetscFunctionBegin;
4115   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4116   PetscValidType(B,1);
4117   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4118   PetscFunctionReturn(0);
4119 }
4120 
4121 #undef __FUNCT__
4122 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4123 /*@
4124      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4125          CSR format the local rows.
4126 
4127    Collective on MPI_Comm
4128 
4129    Input Parameters:
4130 +  comm - MPI communicator
4131 .  m - number of local rows (Cannot be PETSC_DECIDE)
4132 .  n - This value should be the same as the local size used in creating the
4133        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4134        calculated if N is given) For square matrices n is almost always m.
4135 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4136 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4137 .   i - row indices
4138 .   j - column indices
4139 -   a - matrix values
4140 
4141    Output Parameter:
4142 .   mat - the matrix
4143 
4144    Level: intermediate
4145 
4146    Notes:
4147        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4148      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4149      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4150 
4151        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4152 
4153        The format which is used for the sparse matrix input, is equivalent to a
4154     row-major ordering.. i.e for the following matrix, the input data expected is
4155     as shown:
4156 
4157         1 0 0
4158         2 0 3     P0
4159        -------
4160         4 5 6     P1
4161 
4162      Process0 [P0]: rows_owned=[0,1]
4163         i =  {0,1,3}  [size = nrow+1  = 2+1]
4164         j =  {0,0,2}  [size = nz = 6]
4165         v =  {1,2,3}  [size = nz = 6]
4166 
4167      Process1 [P1]: rows_owned=[2]
4168         i =  {0,3}    [size = nrow+1  = 1+1]
4169         j =  {0,1,2}  [size = nz = 6]
4170         v =  {4,5,6}  [size = nz = 6]
4171 
4172 .keywords: matrix, aij, compressed row, sparse, parallel
4173 
4174 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4175           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4176 @*/
4177 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4178 {
4179   PetscErrorCode ierr;
4180 
4181   PetscFunctionBegin;
4182   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4183   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4184   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4185   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4186   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4187   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4188   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4189   PetscFunctionReturn(0);
4190 }
4191 
4192 #undef __FUNCT__
4193 #define __FUNCT__ "MatCreateAIJ"
4194 /*@C
4195    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4196    (the default parallel PETSc format).  For good matrix assembly performance
4197    the user should preallocate the matrix storage by setting the parameters
4198    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4199    performance can be increased by more than a factor of 50.
4200 
4201    Collective on MPI_Comm
4202 
4203    Input Parameters:
4204 +  comm - MPI communicator
4205 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4206            This value should be the same as the local size used in creating the
4207            y vector for the matrix-vector product y = Ax.
4208 .  n - This value should be the same as the local size used in creating the
4209        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4210        calculated if N is given) For square matrices n is almost always m.
4211 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4212 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4213 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4214            (same value is used for all local rows)
4215 .  d_nnz - array containing the number of nonzeros in the various rows of the
4216            DIAGONAL portion of the local submatrix (possibly different for each row)
4217            or NULL, if d_nz is used to specify the nonzero structure.
4218            The size of this array is equal to the number of local rows, i.e 'm'.
4219 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4220            submatrix (same value is used for all local rows).
4221 -  o_nnz - array containing the number of nonzeros in the various rows of the
4222            OFF-DIAGONAL portion of the local submatrix (possibly different for
4223            each row) or NULL, if o_nz is used to specify the nonzero
4224            structure. The size of this array is equal to the number
4225            of local rows, i.e 'm'.
4226 
4227    Output Parameter:
4228 .  A - the matrix
4229 
4230    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4231    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4232    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4233 
4234    Notes:
4235    If the *_nnz parameter is given then the *_nz parameter is ignored
4236 
4237    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4238    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4239    storage requirements for this matrix.
4240 
4241    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4242    processor than it must be used on all processors that share the object for
4243    that argument.
4244 
4245    The user MUST specify either the local or global matrix dimensions
4246    (possibly both).
4247 
4248    The parallel matrix is partitioned across processors such that the
4249    first m0 rows belong to process 0, the next m1 rows belong to
4250    process 1, the next m2 rows belong to process 2 etc.. where
4251    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4252    values corresponding to [m x N] submatrix.
4253 
4254    The columns are logically partitioned with the n0 columns belonging
4255    to 0th partition, the next n1 columns belonging to the next
4256    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4257 
4258    The DIAGONAL portion of the local submatrix on any given processor
4259    is the submatrix corresponding to the rows and columns m,n
4260    corresponding to the given processor. i.e diagonal matrix on
4261    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4262    etc. The remaining portion of the local submatrix [m x (N-n)]
4263    constitute the OFF-DIAGONAL portion. The example below better
4264    illustrates this concept.
4265 
4266    For a square global matrix we define each processor's diagonal portion
4267    to be its local rows and the corresponding columns (a square submatrix);
4268    each processor's off-diagonal portion encompasses the remainder of the
4269    local matrix (a rectangular submatrix).
4270 
4271    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4272 
4273    When calling this routine with a single process communicator, a matrix of
4274    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4275    type of communicator, use the construction mechanism:
4276      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4277 
4278    By default, this format uses inodes (identical nodes) when possible.
4279    We search for consecutive rows with the same nonzero structure, thereby
4280    reusing matrix information to achieve increased efficiency.
4281 
4282    Options Database Keys:
4283 +  -mat_no_inode  - Do not use inodes
4284 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4285 -  -mat_aij_oneindex - Internally use indexing starting at 1
4286         rather than 0.  Note that when calling MatSetValues(),
4287         the user still MUST index entries starting at 0!
4288 
4289 
4290    Example usage:
4291 
4292    Consider the following 8x8 matrix with 34 non-zero values, that is
4293    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4294    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4295    as follows:
4296 
4297 .vb
4298             1  2  0  |  0  3  0  |  0  4
4299     Proc0   0  5  6  |  7  0  0  |  8  0
4300             9  0 10  | 11  0  0  | 12  0
4301     -------------------------------------
4302            13  0 14  | 15 16 17  |  0  0
4303     Proc1   0 18  0  | 19 20 21  |  0  0
4304             0  0  0  | 22 23  0  | 24  0
4305     -------------------------------------
4306     Proc2  25 26 27  |  0  0 28  | 29  0
4307            30  0  0  | 31 32 33  |  0 34
4308 .ve
4309 
4310    This can be represented as a collection of submatrices as:
4311 
4312 .vb
4313       A B C
4314       D E F
4315       G H I
4316 .ve
4317 
4318    Where the submatrices A,B,C are owned by proc0, D,E,F are
4319    owned by proc1, G,H,I are owned by proc2.
4320 
4321    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4322    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4323    The 'M','N' parameters are 8,8, and have the same values on all procs.
4324 
4325    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4326    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4327    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4328    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4329    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4330    matrix, ans [DF] as another SeqAIJ matrix.
4331 
4332    When d_nz, o_nz parameters are specified, d_nz storage elements are
4333    allocated for every row of the local diagonal submatrix, and o_nz
4334    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4335    One way to choose d_nz and o_nz is to use the max nonzerors per local
4336    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4337    In this case, the values of d_nz,o_nz are:
4338 .vb
4339      proc0 : dnz = 2, o_nz = 2
4340      proc1 : dnz = 3, o_nz = 2
4341      proc2 : dnz = 1, o_nz = 4
4342 .ve
4343    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4344    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4345    for proc3. i.e we are using 12+15+10=37 storage locations to store
4346    34 values.
4347 
4348    When d_nnz, o_nnz parameters are specified, the storage is specified
4349    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4350    In the above case the values for d_nnz,o_nnz are:
4351 .vb
4352      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4353      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4354      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4355 .ve
4356    Here the space allocated is sum of all the above values i.e 34, and
4357    hence pre-allocation is perfect.
4358 
4359    Level: intermediate
4360 
4361 .keywords: matrix, aij, compressed row, sparse, parallel
4362 
4363 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4364           MPIAIJ, MatCreateMPIAIJWithArrays()
4365 @*/
4366 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4367 {
4368   PetscErrorCode ierr;
4369   PetscMPIInt    size;
4370 
4371   PetscFunctionBegin;
4372   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4373   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4374   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4375   if (size > 1) {
4376     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4377     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4378   } else {
4379     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4380     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4381   }
4382   PetscFunctionReturn(0);
4383 }
4384 
4385 #undef __FUNCT__
4386 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4387 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4388 {
4389   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4390 
4391   PetscFunctionBegin;
4392   *Ad     = a->A;
4393   *Ao     = a->B;
4394   *colmap = a->garray;
4395   PetscFunctionReturn(0);
4396 }
4397 
4398 #undef __FUNCT__
4399 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4400 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4401 {
4402   PetscErrorCode ierr;
4403   PetscInt       i;
4404   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4405 
4406   PetscFunctionBegin;
4407   if (coloring->ctype == IS_COLORING_GLOBAL) {
4408     ISColoringValue *allcolors,*colors;
4409     ISColoring      ocoloring;
4410 
4411     /* set coloring for diagonal portion */
4412     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4413 
4414     /* set coloring for off-diagonal portion */
4415     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4416     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4417     for (i=0; i<a->B->cmap->n; i++) {
4418       colors[i] = allcolors[a->garray[i]];
4419     }
4420     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4421     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4422     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4423     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4424   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4425     ISColoringValue *colors;
4426     PetscInt        *larray;
4427     ISColoring      ocoloring;
4428 
4429     /* set coloring for diagonal portion */
4430     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4431     for (i=0; i<a->A->cmap->n; i++) {
4432       larray[i] = i + A->cmap->rstart;
4433     }
4434     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4435     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4436     for (i=0; i<a->A->cmap->n; i++) {
4437       colors[i] = coloring->colors[larray[i]];
4438     }
4439     ierr = PetscFree(larray);CHKERRQ(ierr);
4440     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4441     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4442     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4443 
4444     /* set coloring for off-diagonal portion */
4445     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4446     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4447     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4448     for (i=0; i<a->B->cmap->n; i++) {
4449       colors[i] = coloring->colors[larray[i]];
4450     }
4451     ierr = PetscFree(larray);CHKERRQ(ierr);
4452     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4453     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4454     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4455   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4456   PetscFunctionReturn(0);
4457 }
4458 
4459 #undef __FUNCT__
4460 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4461 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4462 {
4463   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4464   PetscErrorCode ierr;
4465 
4466   PetscFunctionBegin;
4467   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4468   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4469   PetscFunctionReturn(0);
4470 }
4471 
4472 #undef __FUNCT__
4473 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4474 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4475 {
4476   PetscErrorCode ierr;
4477   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4478   PetscInt       *indx;
4479 
4480   PetscFunctionBegin;
4481   /* This routine will ONLY return MPIAIJ type matrix */
4482   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4483   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4484   if (n == PETSC_DECIDE) {
4485     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4486   }
4487   /* Check sum(n) = N */
4488   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4489   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4490 
4491   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4492   rstart -= m;
4493 
4494   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4495   for (i=0; i<m; i++) {
4496     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4497     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4498     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4499   }
4500 
4501   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4502   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4503   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4504   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4505   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4506   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4507   PetscFunctionReturn(0);
4508 }
4509 
4510 #undef __FUNCT__
4511 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4512 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4513 {
4514   PetscErrorCode ierr;
4515   PetscInt       m,N,i,rstart,nnz,Ii;
4516   PetscInt       *indx;
4517   PetscScalar    *values;
4518 
4519   PetscFunctionBegin;
4520   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4521   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4522   for (i=0; i<m; i++) {
4523     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4524     Ii   = i + rstart;
4525     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4526     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4527   }
4528   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4529   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4530   PetscFunctionReturn(0);
4531 }
4532 
4533 #undef __FUNCT__
4534 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4535 /*@
4536       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4537                  matrices from each processor
4538 
4539     Collective on MPI_Comm
4540 
4541    Input Parameters:
4542 +    comm - the communicators the parallel matrix will live on
4543 .    inmat - the input sequential matrices
4544 .    n - number of local columns (or PETSC_DECIDE)
4545 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4546 
4547    Output Parameter:
4548 .    outmat - the parallel matrix generated
4549 
4550     Level: advanced
4551 
4552    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4553 
4554 @*/
4555 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4556 {
4557   PetscErrorCode ierr;
4558 
4559   PetscFunctionBegin;
4560   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4561   if (scall == MAT_INITIAL_MATRIX) {
4562     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4563   }
4564   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4565   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4566   PetscFunctionReturn(0);
4567 }
4568 
4569 #undef __FUNCT__
4570 #define __FUNCT__ "MatFileSplit"
4571 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4572 {
4573   PetscErrorCode    ierr;
4574   PetscMPIInt       rank;
4575   PetscInt          m,N,i,rstart,nnz;
4576   size_t            len;
4577   const PetscInt    *indx;
4578   PetscViewer       out;
4579   char              *name;
4580   Mat               B;
4581   const PetscScalar *values;
4582 
4583   PetscFunctionBegin;
4584   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4585   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4586   /* Should this be the type of the diagonal block of A? */
4587   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4588   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4589   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4590   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4591   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4592   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4593   for (i=0; i<m; i++) {
4594     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4595     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4596     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4597   }
4598   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4599   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4600 
4601   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4602   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4603   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4604   sprintf(name,"%s.%d",outfile,rank);
4605   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4606   ierr = PetscFree(name);CHKERRQ(ierr);
4607   ierr = MatView(B,out);CHKERRQ(ierr);
4608   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4609   ierr = MatDestroy(&B);CHKERRQ(ierr);
4610   PetscFunctionReturn(0);
4611 }
4612 
4613 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4614 #undef __FUNCT__
4615 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4616 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4617 {
4618   PetscErrorCode      ierr;
4619   Mat_Merge_SeqsToMPI *merge;
4620   PetscContainer      container;
4621 
4622   PetscFunctionBegin;
4623   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4624   if (container) {
4625     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4626     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4627     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4628     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4629     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4630     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4631     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4632     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4633     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4634     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4635     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4636     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4637     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4638     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4639     ierr = PetscFree(merge);CHKERRQ(ierr);
4640     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4641   }
4642   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4643   PetscFunctionReturn(0);
4644 }
4645 
4646 #include <../src/mat/utils/freespace.h>
4647 #include <petscbt.h>
4648 
4649 #undef __FUNCT__
4650 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4651 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4652 {
4653   PetscErrorCode      ierr;
4654   MPI_Comm            comm;
4655   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4656   PetscMPIInt         size,rank,taga,*len_s;
4657   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4658   PetscInt            proc,m;
4659   PetscInt            **buf_ri,**buf_rj;
4660   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4661   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4662   MPI_Request         *s_waits,*r_waits;
4663   MPI_Status          *status;
4664   MatScalar           *aa=a->a;
4665   MatScalar           **abuf_r,*ba_i;
4666   Mat_Merge_SeqsToMPI *merge;
4667   PetscContainer      container;
4668 
4669   PetscFunctionBegin;
4670   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4671   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4672 
4673   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4674   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4675 
4676   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4677   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4678 
4679   bi     = merge->bi;
4680   bj     = merge->bj;
4681   buf_ri = merge->buf_ri;
4682   buf_rj = merge->buf_rj;
4683 
4684   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4685   owners = merge->rowmap->range;
4686   len_s  = merge->len_s;
4687 
4688   /* send and recv matrix values */
4689   /*-----------------------------*/
4690   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4691   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4692 
4693   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4694   for (proc=0,k=0; proc<size; proc++) {
4695     if (!len_s[proc]) continue;
4696     i    = owners[proc];
4697     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4698     k++;
4699   }
4700 
4701   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4702   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4703   ierr = PetscFree(status);CHKERRQ(ierr);
4704 
4705   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4706   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4707 
4708   /* insert mat values of mpimat */
4709   /*----------------------------*/
4710   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4711   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4712 
4713   for (k=0; k<merge->nrecv; k++) {
4714     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4715     nrows       = *(buf_ri_k[k]);
4716     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4717     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4718   }
4719 
4720   /* set values of ba */
4721   m = merge->rowmap->n;
4722   for (i=0; i<m; i++) {
4723     arow = owners[rank] + i;
4724     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4725     bnzi = bi[i+1] - bi[i];
4726     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4727 
4728     /* add local non-zero vals of this proc's seqmat into ba */
4729     anzi   = ai[arow+1] - ai[arow];
4730     aj     = a->j + ai[arow];
4731     aa     = a->a + ai[arow];
4732     nextaj = 0;
4733     for (j=0; nextaj<anzi; j++) {
4734       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4735         ba_i[j] += aa[nextaj++];
4736       }
4737     }
4738 
4739     /* add received vals into ba */
4740     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4741       /* i-th row */
4742       if (i == *nextrow[k]) {
4743         anzi   = *(nextai[k]+1) - *nextai[k];
4744         aj     = buf_rj[k] + *(nextai[k]);
4745         aa     = abuf_r[k] + *(nextai[k]);
4746         nextaj = 0;
4747         for (j=0; nextaj<anzi; j++) {
4748           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4749             ba_i[j] += aa[nextaj++];
4750           }
4751         }
4752         nextrow[k]++; nextai[k]++;
4753       }
4754     }
4755     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4756   }
4757   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4758   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4759 
4760   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4761   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4762   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4763   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4764   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4765   PetscFunctionReturn(0);
4766 }
4767 
4768 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4769 
4770 #undef __FUNCT__
4771 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4772 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4773 {
4774   PetscErrorCode      ierr;
4775   Mat                 B_mpi;
4776   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4777   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4778   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4779   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4780   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4781   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4782   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4783   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4784   MPI_Status          *status;
4785   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4786   PetscBT             lnkbt;
4787   Mat_Merge_SeqsToMPI *merge;
4788   PetscContainer      container;
4789 
4790   PetscFunctionBegin;
4791   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4792 
4793   /* make sure it is a PETSc comm */
4794   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4795   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4796   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4797 
4798   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4799   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4800 
4801   /* determine row ownership */
4802   /*---------------------------------------------------------*/
4803   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4804   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4805   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4806   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4807   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4808   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4809   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4810 
4811   m      = merge->rowmap->n;
4812   owners = merge->rowmap->range;
4813 
4814   /* determine the number of messages to send, their lengths */
4815   /*---------------------------------------------------------*/
4816   len_s = merge->len_s;
4817 
4818   len          = 0; /* length of buf_si[] */
4819   merge->nsend = 0;
4820   for (proc=0; proc<size; proc++) {
4821     len_si[proc] = 0;
4822     if (proc == rank) {
4823       len_s[proc] = 0;
4824     } else {
4825       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4826       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4827     }
4828     if (len_s[proc]) {
4829       merge->nsend++;
4830       nrows = 0;
4831       for (i=owners[proc]; i<owners[proc+1]; i++) {
4832         if (ai[i+1] > ai[i]) nrows++;
4833       }
4834       len_si[proc] = 2*(nrows+1);
4835       len         += len_si[proc];
4836     }
4837   }
4838 
4839   /* determine the number and length of messages to receive for ij-structure */
4840   /*-------------------------------------------------------------------------*/
4841   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4842   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4843 
4844   /* post the Irecv of j-structure */
4845   /*-------------------------------*/
4846   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4847   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4848 
4849   /* post the Isend of j-structure */
4850   /*--------------------------------*/
4851   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4852 
4853   for (proc=0, k=0; proc<size; proc++) {
4854     if (!len_s[proc]) continue;
4855     i    = owners[proc];
4856     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4857     k++;
4858   }
4859 
4860   /* receives and sends of j-structure are complete */
4861   /*------------------------------------------------*/
4862   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4863   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4864 
4865   /* send and recv i-structure */
4866   /*---------------------------*/
4867   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4868   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4869 
4870   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4871   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4872   for (proc=0,k=0; proc<size; proc++) {
4873     if (!len_s[proc]) continue;
4874     /* form outgoing message for i-structure:
4875          buf_si[0]:                 nrows to be sent
4876                [1:nrows]:           row index (global)
4877                [nrows+1:2*nrows+1]: i-structure index
4878     */
4879     /*-------------------------------------------*/
4880     nrows       = len_si[proc]/2 - 1;
4881     buf_si_i    = buf_si + nrows+1;
4882     buf_si[0]   = nrows;
4883     buf_si_i[0] = 0;
4884     nrows       = 0;
4885     for (i=owners[proc]; i<owners[proc+1]; i++) {
4886       anzi = ai[i+1] - ai[i];
4887       if (anzi) {
4888         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4889         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4890         nrows++;
4891       }
4892     }
4893     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4894     k++;
4895     buf_si += len_si[proc];
4896   }
4897 
4898   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4899   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4900 
4901   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4902   for (i=0; i<merge->nrecv; i++) {
4903     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4904   }
4905 
4906   ierr = PetscFree(len_si);CHKERRQ(ierr);
4907   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4908   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4909   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4910   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4911   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4912   ierr = PetscFree(status);CHKERRQ(ierr);
4913 
4914   /* compute a local seq matrix in each processor */
4915   /*----------------------------------------------*/
4916   /* allocate bi array and free space for accumulating nonzero column info */
4917   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4918   bi[0] = 0;
4919 
4920   /* create and initialize a linked list */
4921   nlnk = N+1;
4922   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4923 
4924   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4925   len  = ai[owners[rank+1]] - ai[owners[rank]];
4926   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4927 
4928   current_space = free_space;
4929 
4930   /* determine symbolic info for each local row */
4931   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4932 
4933   for (k=0; k<merge->nrecv; k++) {
4934     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4935     nrows       = *buf_ri_k[k];
4936     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4937     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4938   }
4939 
4940   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4941   len  = 0;
4942   for (i=0; i<m; i++) {
4943     bnzi = 0;
4944     /* add local non-zero cols of this proc's seqmat into lnk */
4945     arow  = owners[rank] + i;
4946     anzi  = ai[arow+1] - ai[arow];
4947     aj    = a->j + ai[arow];
4948     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4949     bnzi += nlnk;
4950     /* add received col data into lnk */
4951     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4952       if (i == *nextrow[k]) { /* i-th row */
4953         anzi  = *(nextai[k]+1) - *nextai[k];
4954         aj    = buf_rj[k] + *nextai[k];
4955         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4956         bnzi += nlnk;
4957         nextrow[k]++; nextai[k]++;
4958       }
4959     }
4960     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4961 
4962     /* if free space is not available, make more free space */
4963     if (current_space->local_remaining<bnzi) {
4964       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4965       nspacedouble++;
4966     }
4967     /* copy data into free space, then initialize lnk */
4968     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4969     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4970 
4971     current_space->array           += bnzi;
4972     current_space->local_used      += bnzi;
4973     current_space->local_remaining -= bnzi;
4974 
4975     bi[i+1] = bi[i] + bnzi;
4976   }
4977 
4978   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4979 
4980   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4981   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4982   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4983 
4984   /* create symbolic parallel matrix B_mpi */
4985   /*---------------------------------------*/
4986   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4987   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4988   if (n==PETSC_DECIDE) {
4989     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4990   } else {
4991     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4992   }
4993   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4994   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4995   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4996   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4997   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4998 
4999   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5000   B_mpi->assembled    = PETSC_FALSE;
5001   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5002   merge->bi           = bi;
5003   merge->bj           = bj;
5004   merge->buf_ri       = buf_ri;
5005   merge->buf_rj       = buf_rj;
5006   merge->coi          = NULL;
5007   merge->coj          = NULL;
5008   merge->owners_co    = NULL;
5009 
5010   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5011 
5012   /* attach the supporting struct to B_mpi for reuse */
5013   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5014   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5015   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5016   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5017   *mpimat = B_mpi;
5018 
5019   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5020   PetscFunctionReturn(0);
5021 }
5022 
5023 #undef __FUNCT__
5024 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
5025 /*@C
5026       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
5027                  matrices from each processor
5028 
5029     Collective on MPI_Comm
5030 
5031    Input Parameters:
5032 +    comm - the communicators the parallel matrix will live on
5033 .    seqmat - the input sequential matrices
5034 .    m - number of local rows (or PETSC_DECIDE)
5035 .    n - number of local columns (or PETSC_DECIDE)
5036 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5037 
5038    Output Parameter:
5039 .    mpimat - the parallel matrix generated
5040 
5041     Level: advanced
5042 
5043    Notes:
5044      The dimensions of the sequential matrix in each processor MUST be the same.
5045      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5046      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5047 @*/
5048 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5049 {
5050   PetscErrorCode ierr;
5051   PetscMPIInt    size;
5052 
5053   PetscFunctionBegin;
5054   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5055   if (size == 1) {
5056     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5057     if (scall == MAT_INITIAL_MATRIX) {
5058       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5059     } else {
5060       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5061     }
5062     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5063     PetscFunctionReturn(0);
5064   }
5065   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5066   if (scall == MAT_INITIAL_MATRIX) {
5067     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5068   }
5069   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5070   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5071   PetscFunctionReturn(0);
5072 }
5073 
5074 #undef __FUNCT__
5075 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5076 /*@
5077      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5078           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5079           with MatGetSize()
5080 
5081     Not Collective
5082 
5083    Input Parameters:
5084 +    A - the matrix
5085 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5086 
5087    Output Parameter:
5088 .    A_loc - the local sequential matrix generated
5089 
5090     Level: developer
5091 
5092 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5093 
5094 @*/
5095 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5096 {
5097   PetscErrorCode ierr;
5098   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5099   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5100   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5101   MatScalar      *aa=a->a,*ba=b->a,*cam;
5102   PetscScalar    *ca;
5103   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5104   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5105   PetscBool      match;
5106 
5107   PetscFunctionBegin;
5108   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5109   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5110   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5111   if (scall == MAT_INITIAL_MATRIX) {
5112     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5113     ci[0] = 0;
5114     for (i=0; i<am; i++) {
5115       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5116     }
5117     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5118     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5119     k    = 0;
5120     for (i=0; i<am; i++) {
5121       ncols_o = bi[i+1] - bi[i];
5122       ncols_d = ai[i+1] - ai[i];
5123       /* off-diagonal portion of A */
5124       for (jo=0; jo<ncols_o; jo++) {
5125         col = cmap[*bj];
5126         if (col >= cstart) break;
5127         cj[k]   = col; bj++;
5128         ca[k++] = *ba++;
5129       }
5130       /* diagonal portion of A */
5131       for (j=0; j<ncols_d; j++) {
5132         cj[k]   = cstart + *aj++;
5133         ca[k++] = *aa++;
5134       }
5135       /* off-diagonal portion of A */
5136       for (j=jo; j<ncols_o; j++) {
5137         cj[k]   = cmap[*bj++];
5138         ca[k++] = *ba++;
5139       }
5140     }
5141     /* put together the new matrix */
5142     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5143     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5144     /* Since these are PETSc arrays, change flags to free them as necessary. */
5145     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5146     mat->free_a  = PETSC_TRUE;
5147     mat->free_ij = PETSC_TRUE;
5148     mat->nonew   = 0;
5149   } else if (scall == MAT_REUSE_MATRIX) {
5150     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5151     ci = mat->i; cj = mat->j; cam = mat->a;
5152     for (i=0; i<am; i++) {
5153       /* off-diagonal portion of A */
5154       ncols_o = bi[i+1] - bi[i];
5155       for (jo=0; jo<ncols_o; jo++) {
5156         col = cmap[*bj];
5157         if (col >= cstart) break;
5158         *cam++ = *ba++; bj++;
5159       }
5160       /* diagonal portion of A */
5161       ncols_d = ai[i+1] - ai[i];
5162       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5163       /* off-diagonal portion of A */
5164       for (j=jo; j<ncols_o; j++) {
5165         *cam++ = *ba++; bj++;
5166       }
5167     }
5168   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5169   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5170   PetscFunctionReturn(0);
5171 }
5172 
5173 #undef __FUNCT__
5174 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5175 /*@C
5176      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5177 
5178     Not Collective
5179 
5180    Input Parameters:
5181 +    A - the matrix
5182 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5183 -    row, col - index sets of rows and columns to extract (or NULL)
5184 
5185    Output Parameter:
5186 .    A_loc - the local sequential matrix generated
5187 
5188     Level: developer
5189 
5190 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5191 
5192 @*/
5193 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5194 {
5195   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5196   PetscErrorCode ierr;
5197   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5198   IS             isrowa,iscola;
5199   Mat            *aloc;
5200   PetscBool      match;
5201 
5202   PetscFunctionBegin;
5203   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5204   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5205   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5206   if (!row) {
5207     start = A->rmap->rstart; end = A->rmap->rend;
5208     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5209   } else {
5210     isrowa = *row;
5211   }
5212   if (!col) {
5213     start = A->cmap->rstart;
5214     cmap  = a->garray;
5215     nzA   = a->A->cmap->n;
5216     nzB   = a->B->cmap->n;
5217     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5218     ncols = 0;
5219     for (i=0; i<nzB; i++) {
5220       if (cmap[i] < start) idx[ncols++] = cmap[i];
5221       else break;
5222     }
5223     imark = i;
5224     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5225     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5226     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5227   } else {
5228     iscola = *col;
5229   }
5230   if (scall != MAT_INITIAL_MATRIX) {
5231     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5232     aloc[0] = *A_loc;
5233   }
5234   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5235   *A_loc = aloc[0];
5236   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5237   if (!row) {
5238     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5239   }
5240   if (!col) {
5241     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5242   }
5243   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5244   PetscFunctionReturn(0);
5245 }
5246 
5247 #undef __FUNCT__
5248 #define __FUNCT__ "MatGetBrowsOfAcols"
5249 /*@C
5250     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5251 
5252     Collective on Mat
5253 
5254    Input Parameters:
5255 +    A,B - the matrices in mpiaij format
5256 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5257 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5258 
5259    Output Parameter:
5260 +    rowb, colb - index sets of rows and columns of B to extract
5261 -    B_seq - the sequential matrix generated
5262 
5263     Level: developer
5264 
5265 @*/
5266 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5267 {
5268   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5269   PetscErrorCode ierr;
5270   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5271   IS             isrowb,iscolb;
5272   Mat            *bseq=NULL;
5273 
5274   PetscFunctionBegin;
5275   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5276     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5277   }
5278   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5279 
5280   if (scall == MAT_INITIAL_MATRIX) {
5281     start = A->cmap->rstart;
5282     cmap  = a->garray;
5283     nzA   = a->A->cmap->n;
5284     nzB   = a->B->cmap->n;
5285     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5286     ncols = 0;
5287     for (i=0; i<nzB; i++) {  /* row < local row index */
5288       if (cmap[i] < start) idx[ncols++] = cmap[i];
5289       else break;
5290     }
5291     imark = i;
5292     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5293     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5294     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5295     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5296   } else {
5297     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5298     isrowb  = *rowb; iscolb = *colb;
5299     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5300     bseq[0] = *B_seq;
5301   }
5302   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5303   *B_seq = bseq[0];
5304   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5305   if (!rowb) {
5306     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5307   } else {
5308     *rowb = isrowb;
5309   }
5310   if (!colb) {
5311     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5312   } else {
5313     *colb = iscolb;
5314   }
5315   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5316   PetscFunctionReturn(0);
5317 }
5318 
5319 #undef __FUNCT__
5320 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5321 /*
5322     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5323     of the OFF-DIAGONAL portion of local A
5324 
5325     Collective on Mat
5326 
5327    Input Parameters:
5328 +    A,B - the matrices in mpiaij format
5329 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5330 
5331    Output Parameter:
5332 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5333 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5334 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5335 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5336 
5337     Level: developer
5338 
5339 */
5340 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5341 {
5342   VecScatter_MPI_General *gen_to,*gen_from;
5343   PetscErrorCode         ierr;
5344   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5345   Mat_SeqAIJ             *b_oth;
5346   VecScatter             ctx =a->Mvctx;
5347   MPI_Comm               comm;
5348   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5349   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5350   PetscScalar            *rvalues,*svalues;
5351   MatScalar              *b_otha,*bufa,*bufA;
5352   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5353   MPI_Request            *rwaits = NULL,*swaits = NULL;
5354   MPI_Status             *sstatus,rstatus;
5355   PetscMPIInt            jj;
5356   PetscInt               *cols,sbs,rbs;
5357   PetscScalar            *vals;
5358 
5359   PetscFunctionBegin;
5360   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5361   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5362     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5363   }
5364   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5365   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5366 
5367   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5368   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5369   rvalues  = gen_from->values; /* holds the length of receiving row */
5370   svalues  = gen_to->values;   /* holds the length of sending row */
5371   nrecvs   = gen_from->n;
5372   nsends   = gen_to->n;
5373 
5374   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5375   srow    = gen_to->indices;    /* local row index to be sent */
5376   sstarts = gen_to->starts;
5377   sprocs  = gen_to->procs;
5378   sstatus = gen_to->sstatus;
5379   sbs     = gen_to->bs;
5380   rstarts = gen_from->starts;
5381   rprocs  = gen_from->procs;
5382   rbs     = gen_from->bs;
5383 
5384   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5385   if (scall == MAT_INITIAL_MATRIX) {
5386     /* i-array */
5387     /*---------*/
5388     /*  post receives */
5389     for (i=0; i<nrecvs; i++) {
5390       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5391       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5392       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5393     }
5394 
5395     /* pack the outgoing message */
5396     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5397 
5398     sstartsj[0] = 0;
5399     rstartsj[0] = 0;
5400     len         = 0; /* total length of j or a array to be sent */
5401     k           = 0;
5402     for (i=0; i<nsends; i++) {
5403       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5404       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5405       for (j=0; j<nrows; j++) {
5406         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5407         for (l=0; l<sbs; l++) {
5408           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5409 
5410           rowlen[j*sbs+l] = ncols;
5411 
5412           len += ncols;
5413           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5414         }
5415         k++;
5416       }
5417       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5418 
5419       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5420     }
5421     /* recvs and sends of i-array are completed */
5422     i = nrecvs;
5423     while (i--) {
5424       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5425     }
5426     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5427 
5428     /* allocate buffers for sending j and a arrays */
5429     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5430     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5431 
5432     /* create i-array of B_oth */
5433     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5434 
5435     b_othi[0] = 0;
5436     len       = 0; /* total length of j or a array to be received */
5437     k         = 0;
5438     for (i=0; i<nrecvs; i++) {
5439       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5440       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5441       for (j=0; j<nrows; j++) {
5442         b_othi[k+1] = b_othi[k] + rowlen[j];
5443         len        += rowlen[j]; k++;
5444       }
5445       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5446     }
5447 
5448     /* allocate space for j and a arrrays of B_oth */
5449     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5450     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5451 
5452     /* j-array */
5453     /*---------*/
5454     /*  post receives of j-array */
5455     for (i=0; i<nrecvs; i++) {
5456       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5457       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5458     }
5459 
5460     /* pack the outgoing message j-array */
5461     k = 0;
5462     for (i=0; i<nsends; i++) {
5463       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5464       bufJ  = bufj+sstartsj[i];
5465       for (j=0; j<nrows; j++) {
5466         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5467         for (ll=0; ll<sbs; ll++) {
5468           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5469           for (l=0; l<ncols; l++) {
5470             *bufJ++ = cols[l];
5471           }
5472           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5473         }
5474       }
5475       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5476     }
5477 
5478     /* recvs and sends of j-array are completed */
5479     i = nrecvs;
5480     while (i--) {
5481       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5482     }
5483     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5484   } else if (scall == MAT_REUSE_MATRIX) {
5485     sstartsj = *startsj_s;
5486     rstartsj = *startsj_r;
5487     bufa     = *bufa_ptr;
5488     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5489     b_otha   = b_oth->a;
5490   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5491 
5492   /* a-array */
5493   /*---------*/
5494   /*  post receives of a-array */
5495   for (i=0; i<nrecvs; i++) {
5496     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5497     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5498   }
5499 
5500   /* pack the outgoing message a-array */
5501   k = 0;
5502   for (i=0; i<nsends; i++) {
5503     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5504     bufA  = bufa+sstartsj[i];
5505     for (j=0; j<nrows; j++) {
5506       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5507       for (ll=0; ll<sbs; ll++) {
5508         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5509         for (l=0; l<ncols; l++) {
5510           *bufA++ = vals[l];
5511         }
5512         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5513       }
5514     }
5515     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5516   }
5517   /* recvs and sends of a-array are completed */
5518   i = nrecvs;
5519   while (i--) {
5520     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5521   }
5522   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5523   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5524 
5525   if (scall == MAT_INITIAL_MATRIX) {
5526     /* put together the new matrix */
5527     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5528 
5529     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5530     /* Since these are PETSc arrays, change flags to free them as necessary. */
5531     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5532     b_oth->free_a  = PETSC_TRUE;
5533     b_oth->free_ij = PETSC_TRUE;
5534     b_oth->nonew   = 0;
5535 
5536     ierr = PetscFree(bufj);CHKERRQ(ierr);
5537     if (!startsj_s || !bufa_ptr) {
5538       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5539       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5540     } else {
5541       *startsj_s = sstartsj;
5542       *startsj_r = rstartsj;
5543       *bufa_ptr  = bufa;
5544     }
5545   }
5546   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5547   PetscFunctionReturn(0);
5548 }
5549 
5550 #undef __FUNCT__
5551 #define __FUNCT__ "MatGetCommunicationStructs"
5552 /*@C
5553   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5554 
5555   Not Collective
5556 
5557   Input Parameters:
5558 . A - The matrix in mpiaij format
5559 
5560   Output Parameter:
5561 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5562 . colmap - A map from global column index to local index into lvec
5563 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5564 
5565   Level: developer
5566 
5567 @*/
5568 #if defined(PETSC_USE_CTABLE)
5569 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5570 #else
5571 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5572 #endif
5573 {
5574   Mat_MPIAIJ *a;
5575 
5576   PetscFunctionBegin;
5577   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5578   PetscValidPointer(lvec, 2);
5579   PetscValidPointer(colmap, 3);
5580   PetscValidPointer(multScatter, 4);
5581   a = (Mat_MPIAIJ*) A->data;
5582   if (lvec) *lvec = a->lvec;
5583   if (colmap) *colmap = a->colmap;
5584   if (multScatter) *multScatter = a->Mvctx;
5585   PetscFunctionReturn(0);
5586 }
5587 
5588 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5589 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5590 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5591 
5592 #undef __FUNCT__
5593 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5594 /*
5595     Computes (B'*A')' since computing B*A directly is untenable
5596 
5597                n                       p                          p
5598         (              )       (              )         (                  )
5599       m (      A       )  *  n (       B      )   =   m (         C        )
5600         (              )       (              )         (                  )
5601 
5602 */
5603 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5604 {
5605   PetscErrorCode ierr;
5606   Mat            At,Bt,Ct;
5607 
5608   PetscFunctionBegin;
5609   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5610   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5611   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5612   ierr = MatDestroy(&At);CHKERRQ(ierr);
5613   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5614   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5615   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5616   PetscFunctionReturn(0);
5617 }
5618 
5619 #undef __FUNCT__
5620 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5621 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5622 {
5623   PetscErrorCode ierr;
5624   PetscInt       m=A->rmap->n,n=B->cmap->n;
5625   Mat            Cmat;
5626 
5627   PetscFunctionBegin;
5628   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5629   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5630   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5631   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5632   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5633   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5634   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5635   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5636 
5637   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5638 
5639   *C = Cmat;
5640   PetscFunctionReturn(0);
5641 }
5642 
5643 /* ----------------------------------------------------------------*/
5644 #undef __FUNCT__
5645 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5646 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5647 {
5648   PetscErrorCode ierr;
5649 
5650   PetscFunctionBegin;
5651   if (scall == MAT_INITIAL_MATRIX) {
5652     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5653     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5654     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5655   }
5656   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5657   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5658   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5659   PetscFunctionReturn(0);
5660 }
5661 
5662 #if defined(PETSC_HAVE_MUMPS)
5663 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5664 #endif
5665 #if defined(PETSC_HAVE_PASTIX)
5666 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5667 #endif
5668 #if defined(PETSC_HAVE_SUPERLU_DIST)
5669 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5670 #endif
5671 #if defined(PETSC_HAVE_CLIQUE)
5672 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5673 #endif
5674 
5675 /*MC
5676    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5677 
5678    Options Database Keys:
5679 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5680 
5681   Level: beginner
5682 
5683 .seealso: MatCreateAIJ()
5684 M*/
5685 
5686 #undef __FUNCT__
5687 #define __FUNCT__ "MatCreate_MPIAIJ"
5688 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5689 {
5690   Mat_MPIAIJ     *b;
5691   PetscErrorCode ierr;
5692   PetscMPIInt    size;
5693 
5694   PetscFunctionBegin;
5695   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5696 
5697   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5698   B->data       = (void*)b;
5699   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5700   B->assembled  = PETSC_FALSE;
5701   B->insertmode = NOT_SET_VALUES;
5702   b->size       = size;
5703 
5704   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5705 
5706   /* build cache for off array entries formed */
5707   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5708 
5709   b->donotstash  = PETSC_FALSE;
5710   b->colmap      = 0;
5711   b->garray      = 0;
5712   b->roworiented = PETSC_TRUE;
5713 
5714   /* stuff used for matrix vector multiply */
5715   b->lvec  = NULL;
5716   b->Mvctx = NULL;
5717 
5718   /* stuff for MatGetRow() */
5719   b->rowindices   = 0;
5720   b->rowvalues    = 0;
5721   b->getrowactive = PETSC_FALSE;
5722 
5723   /* flexible pointer used in CUSP/CUSPARSE classes */
5724   b->spptr = NULL;
5725 
5726 #if defined(PETSC_HAVE_MUMPS)
5727   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5728 #endif
5729 #if defined(PETSC_HAVE_PASTIX)
5730   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5731 #endif
5732 #if defined(PETSC_HAVE_SUPERLU_DIST)
5733   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5734 #endif
5735 #if defined(PETSC_HAVE_CLIQUE)
5736   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5737 #endif
5738   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5739   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5740   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5741   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5742   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5743   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5744   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5745   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5746   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5747   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5748   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5749   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5750   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5751   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5752   PetscFunctionReturn(0);
5753 }
5754 
5755 #undef __FUNCT__
5756 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5757 /*@
5758      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5759          and "off-diagonal" part of the matrix in CSR format.
5760 
5761    Collective on MPI_Comm
5762 
5763    Input Parameters:
5764 +  comm - MPI communicator
5765 .  m - number of local rows (Cannot be PETSC_DECIDE)
5766 .  n - This value should be the same as the local size used in creating the
5767        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5768        calculated if N is given) For square matrices n is almost always m.
5769 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5770 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5771 .   i - row indices for "diagonal" portion of matrix
5772 .   j - column indices
5773 .   a - matrix values
5774 .   oi - row indices for "off-diagonal" portion of matrix
5775 .   oj - column indices
5776 -   oa - matrix values
5777 
5778    Output Parameter:
5779 .   mat - the matrix
5780 
5781    Level: advanced
5782 
5783    Notes:
5784        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5785        must free the arrays once the matrix has been destroyed and not before.
5786 
5787        The i and j indices are 0 based
5788 
5789        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5790 
5791        This sets local rows and cannot be used to set off-processor values.
5792 
5793        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5794        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5795        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5796        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5797        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5798        communication if it is known that only local entries will be set.
5799 
5800 .keywords: matrix, aij, compressed row, sparse, parallel
5801 
5802 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5803           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5804 @*/
5805 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5806 {
5807   PetscErrorCode ierr;
5808   Mat_MPIAIJ     *maij;
5809 
5810   PetscFunctionBegin;
5811   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5812   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5813   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5814   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5815   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5816   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5817   maij = (Mat_MPIAIJ*) (*mat)->data;
5818 
5819   (*mat)->preallocated = PETSC_TRUE;
5820 
5821   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5822   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5823 
5824   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5825   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5826 
5827   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5828   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5829   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5830   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5831 
5832   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5833   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5834   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5835   PetscFunctionReturn(0);
5836 }
5837 
5838 /*
5839     Special version for direct calls from Fortran
5840 */
5841 #include <petsc-private/fortranimpl.h>
5842 
5843 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5844 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5845 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5846 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5847 #endif
5848 
5849 /* Change these macros so can be used in void function */
5850 #undef CHKERRQ
5851 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5852 #undef SETERRQ2
5853 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5854 #undef SETERRQ3
5855 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5856 #undef SETERRQ
5857 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5858 
5859 #undef __FUNCT__
5860 #define __FUNCT__ "matsetvaluesmpiaij_"
5861 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5862 {
5863   Mat            mat  = *mmat;
5864   PetscInt       m    = *mm, n = *mn;
5865   InsertMode     addv = *maddv;
5866   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5867   PetscScalar    value;
5868   PetscErrorCode ierr;
5869 
5870   MatCheckPreallocated(mat,1);
5871   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5872 
5873 #if defined(PETSC_USE_DEBUG)
5874   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5875 #endif
5876   {
5877     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5878     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5879     PetscBool roworiented = aij->roworiented;
5880 
5881     /* Some Variables required in the macro */
5882     Mat        A                 = aij->A;
5883     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5884     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5885     MatScalar  *aa               = a->a;
5886     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5887     Mat        B                 = aij->B;
5888     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5889     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5890     MatScalar  *ba               = b->a;
5891 
5892     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5893     PetscInt  nonew = a->nonew;
5894     MatScalar *ap1,*ap2;
5895 
5896     PetscFunctionBegin;
5897     for (i=0; i<m; i++) {
5898       if (im[i] < 0) continue;
5899 #if defined(PETSC_USE_DEBUG)
5900       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5901 #endif
5902       if (im[i] >= rstart && im[i] < rend) {
5903         row      = im[i] - rstart;
5904         lastcol1 = -1;
5905         rp1      = aj + ai[row];
5906         ap1      = aa + ai[row];
5907         rmax1    = aimax[row];
5908         nrow1    = ailen[row];
5909         low1     = 0;
5910         high1    = nrow1;
5911         lastcol2 = -1;
5912         rp2      = bj + bi[row];
5913         ap2      = ba + bi[row];
5914         rmax2    = bimax[row];
5915         nrow2    = bilen[row];
5916         low2     = 0;
5917         high2    = nrow2;
5918 
5919         for (j=0; j<n; j++) {
5920           if (roworiented) value = v[i*n+j];
5921           else value = v[i+j*m];
5922           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5923           if (in[j] >= cstart && in[j] < cend) {
5924             col = in[j] - cstart;
5925             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5926           } else if (in[j] < 0) continue;
5927 #if defined(PETSC_USE_DEBUG)
5928           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5929 #endif
5930           else {
5931             if (mat->was_assembled) {
5932               if (!aij->colmap) {
5933                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5934               }
5935 #if defined(PETSC_USE_CTABLE)
5936               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5937               col--;
5938 #else
5939               col = aij->colmap[in[j]] - 1;
5940 #endif
5941               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5942                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5943                 col  =  in[j];
5944                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5945                 B     = aij->B;
5946                 b     = (Mat_SeqAIJ*)B->data;
5947                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5948                 rp2   = bj + bi[row];
5949                 ap2   = ba + bi[row];
5950                 rmax2 = bimax[row];
5951                 nrow2 = bilen[row];
5952                 low2  = 0;
5953                 high2 = nrow2;
5954                 bm    = aij->B->rmap->n;
5955                 ba    = b->a;
5956               }
5957             } else col = in[j];
5958             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5959           }
5960         }
5961       } else if (!aij->donotstash) {
5962         if (roworiented) {
5963           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5964         } else {
5965           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5966         }
5967       }
5968     }
5969   }
5970   PetscFunctionReturnVoid();
5971 }
5972 
5973