xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8563dfcc547dae59d5e1dc6b8e602db41cb72b14)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 
6 /*MC
7    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
8 
9    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
10    and MATMPIAIJ otherwise.  As a result, for single process communicators,
11   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
12   for communicators controlling multiple processes.  It is recommended that you call both of
13   the above preallocation routines for simplicity.
14 
15    Options Database Keys:
16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
17 
18   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
19    enough exist.
20 
21   Level: beginner
22 
23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
24 M*/
25 
26 /*MC
27    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
28 
29    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
30    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
31    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
32   for communicators controlling multiple processes.  It is recommended that you call both of
33   the above preallocation routines for simplicity.
34 
35    Options Database Keys:
36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
37 
38   Level: beginner
39 
40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
41 M*/
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
46 {
47   PetscErrorCode  ierr;
48   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
49   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
50   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
51   const PetscInt  *ia,*ib;
52   const MatScalar *aa,*bb;
53   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
54   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
55 
56   PetscFunctionBegin;
57   *keptrows = 0;
58   ia        = a->i;
59   ib        = b->i;
60   for (i=0; i<m; i++) {
61     na = ia[i+1] - ia[i];
62     nb = ib[i+1] - ib[i];
63     if (!na && !nb) {
64       cnt++;
65       goto ok1;
66     }
67     aa = a->a + ia[i];
68     for (j=0; j<na; j++) {
69       if (aa[j] != 0.0) goto ok1;
70     }
71     bb = b->a + ib[i];
72     for (j=0; j <nb; j++) {
73       if (bb[j] != 0.0) goto ok1;
74     }
75     cnt++;
76 ok1:;
77   }
78   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
79   if (!n0rows) PetscFunctionReturn(0);
80   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
81   cnt  = 0;
82   for (i=0; i<m; i++) {
83     na = ia[i+1] - ia[i];
84     nb = ib[i+1] - ib[i];
85     if (!na && !nb) continue;
86     aa = a->a + ia[i];
87     for (j=0; j<na;j++) {
88       if (aa[j] != 0.0) {
89         rows[cnt++] = rstart + i;
90         goto ok2;
91       }
92     }
93     bb = b->a + ib[i];
94     for (j=0; j<nb; j++) {
95       if (bb[j] != 0.0) {
96         rows[cnt++] = rstart + i;
97         goto ok2;
98       }
99     }
100 ok2:;
101   }
102   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
109 {
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
111   PetscErrorCode ierr;
112   PetscInt       i,rstart,nrows,*rows;
113 
114   PetscFunctionBegin;
115   *zrows = NULL;
116   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
117   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
118   for (i=0; i<nrows; i++) rows[i] += rstart;
119   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
126 {
127   PetscErrorCode ierr;
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
129   PetscInt       i,n,*garray = aij->garray;
130   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
131   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
132   PetscReal      *work;
133 
134   PetscFunctionBegin;
135   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
136   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
137   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   CHKMEMQ;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   CHKMEMQ;
349   PetscFunctionReturn(0);
350 }
351 
352 /*
353   Local utility routine that creates a mapping from the global column
354 number to the local number in the off-diagonal part of the local
355 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
356 a slightly higher hash table cost; without it it is not scalable (each processor
357 has an order N integer array but is fast to acess.
358 */
359 #undef __FUNCT__
360 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
361 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
362 {
363   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
364   PetscErrorCode ierr;
365   PetscInt       n = aij->B->cmap->n,i;
366 
367   PetscFunctionBegin;
368   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
369 #if defined(PETSC_USE_CTABLE)
370   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
371   for (i=0; i<n; i++) {
372     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
373   }
374 #else
375   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
376   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
378   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
379 #endif
380   PetscFunctionReturn(0);
381 }
382 
383 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
384 { \
385     if (col <= lastcol1)  low1 = 0;     \
386     else                 high1 = nrow1; \
387     lastcol1 = col;\
388     while (high1-low1 > 5) { \
389       t = (low1+high1)/2; \
390       if (rp1[t] > col) high1 = t; \
391       else              low1  = t; \
392     } \
393       for (_i=low1; _i<high1; _i++) { \
394         if (rp1[_i] > col) break; \
395         if (rp1[_i] == col) { \
396           if (addv == ADD_VALUES) ap1[_i] += value;   \
397           else                    ap1[_i] = value; \
398           goto a_noinsert; \
399         } \
400       }  \
401       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
402       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
403       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
404       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
405       N = nrow1++ - 1; a->nz++; high1++; \
406       /* shift up all the later entries in this row */ \
407       for (ii=N; ii>=_i; ii--) { \
408         rp1[ii+1] = rp1[ii]; \
409         ap1[ii+1] = ap1[ii]; \
410       } \
411       rp1[_i] = col;  \
412       ap1[_i] = value;  \
413       a_noinsert: ; \
414       ailen[row] = nrow1; \
415 }
416 
417 
418 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
419   { \
420     if (col <= lastcol2) low2 = 0;                        \
421     else high2 = nrow2;                                   \
422     lastcol2 = col;                                       \
423     while (high2-low2 > 5) {                              \
424       t = (low2+high2)/2;                                 \
425       if (rp2[t] > col) high2 = t;                        \
426       else             low2  = t;                         \
427     }                                                     \
428     for (_i=low2; _i<high2; _i++) {                       \
429       if (rp2[_i] > col) break;                           \
430       if (rp2[_i] == col) {                               \
431         if (addv == ADD_VALUES) ap2[_i] += value;         \
432         else                    ap2[_i] = value;          \
433         goto b_noinsert;                                  \
434       }                                                   \
435     }                                                     \
436     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
437     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
438     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
439     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
440     N = nrow2++ - 1; b->nz++; high2++;                    \
441     /* shift up all the later entries in this row */      \
442     for (ii=N; ii>=_i; ii--) {                            \
443       rp2[ii+1] = rp2[ii];                                \
444       ap2[ii+1] = ap2[ii];                                \
445     }                                                     \
446     rp2[_i] = col;                                        \
447     ap2[_i] = value;                                      \
448     b_noinsert: ;                                         \
449     bilen[row] = nrow2;                                   \
450   }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
454 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
455 {
456   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
457   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
458   PetscErrorCode ierr;
459   PetscInt       l,*garray = mat->garray,diag;
460 
461   PetscFunctionBegin;
462   /* code only works for square matrices A */
463 
464   /* find size of row to the left of the diagonal part */
465   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
466   row  = row - diag;
467   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
468     if (garray[b->j[b->i[row]+l]] > diag) break;
469   }
470   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
471 
472   /* diagonal part */
473   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
474 
475   /* right of diagonal part */
476   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
477   PetscFunctionReturn(0);
478 }
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatSetValues_MPIAIJ"
482 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
483 {
484   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
485   PetscScalar    value;
486   PetscErrorCode ierr;
487   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
488   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
489   PetscBool      roworiented = aij->roworiented;
490 
491   /* Some Variables required in the macro */
492   Mat        A                 = aij->A;
493   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
494   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
495   MatScalar  *aa               = a->a;
496   PetscBool  ignorezeroentries = a->ignorezeroentries;
497   Mat        B                 = aij->B;
498   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
499   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
500   MatScalar  *ba               = b->a;
501 
502   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
503   PetscInt  nonew;
504   MatScalar *ap1,*ap2;
505 
506   PetscFunctionBegin;
507   if (v) PetscValidScalarPointer(v,6);
508   for (i=0; i<m; i++) {
509     if (im[i] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
512 #endif
513     if (im[i] >= rstart && im[i] < rend) {
514       row      = im[i] - rstart;
515       lastcol1 = -1;
516       rp1      = aj + ai[row];
517       ap1      = aa + ai[row];
518       rmax1    = aimax[row];
519       nrow1    = ailen[row];
520       low1     = 0;
521       high1    = nrow1;
522       lastcol2 = -1;
523       rp2      = bj + bi[row];
524       ap2      = ba + bi[row];
525       rmax2    = bimax[row];
526       nrow2    = bilen[row];
527       low2     = 0;
528       high2    = nrow2;
529 
530       for (j=0; j<n; j++) {
531         if (v) {
532           if (roworiented) value = v[i*n+j];
533           else             value = v[i+j*m];
534         } else value = 0.0;
535         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
536         if (in[j] >= cstart && in[j] < cend) {
537           col   = in[j] - cstart;
538           nonew = a->nonew;
539           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
540         } else if (in[j] < 0) continue;
541 #if defined(PETSC_USE_DEBUG)
542         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
543 #endif
544         else {
545           if (mat->was_assembled) {
546             if (!aij->colmap) {
547               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
548             }
549 #if defined(PETSC_USE_CTABLE)
550             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
551             col--;
552 #else
553             col = aij->colmap[in[j]] - 1;
554 #endif
555             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
556               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
557               col  =  in[j];
558               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
559               B     = aij->B;
560               b     = (Mat_SeqAIJ*)B->data;
561               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
562               rp2   = bj + bi[row];
563               ap2   = ba + bi[row];
564               rmax2 = bimax[row];
565               nrow2 = bilen[row];
566               low2  = 0;
567               high2 = nrow2;
568               bm    = aij->B->rmap->n;
569               ba    = b->a;
570             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
571           } else col = in[j];
572           nonew = b->nonew;
573           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
574         }
575       }
576     } else {
577       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
578       if (!aij->donotstash) {
579         mat->assembled = PETSC_FALSE;
580         if (roworiented) {
581           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
582         } else {
583           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
584         }
585       }
586     }
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatGetValues_MPIAIJ"
593 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
594 {
595   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
596   PetscErrorCode ierr;
597   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
598   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
599 
600   PetscFunctionBegin;
601   for (i=0; i<m; i++) {
602     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
603     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
604     if (idxm[i] >= rstart && idxm[i] < rend) {
605       row = idxm[i] - rstart;
606       for (j=0; j<n; j++) {
607         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
608         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
609         if (idxn[j] >= cstart && idxn[j] < cend) {
610           col  = idxn[j] - cstart;
611           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612         } else {
613           if (!aij->colmap) {
614             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
615           }
616 #if defined(PETSC_USE_CTABLE)
617           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
618           col--;
619 #else
620           col = aij->colmap[idxn[j]] - 1;
621 #endif
622           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
623           else {
624             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
625           }
626         }
627       }
628     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
629   }
630   PetscFunctionReturn(0);
631 }
632 
633 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
634 
635 #undef __FUNCT__
636 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
637 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
638 {
639   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
640   PetscErrorCode ierr;
641   PetscInt       nstash,reallocs;
642   InsertMode     addv;
643 
644   PetscFunctionBegin;
645   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
646 
647   /* make sure all processors are either in INSERTMODE or ADDMODE */
648   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
649   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
650   mat->insertmode = addv; /* in case this processor had no cache */
651 
652   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
653   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
654   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNCT__
659 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
660 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
661 {
662   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
663   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
664   PetscErrorCode ierr;
665   PetscMPIInt    n;
666   PetscInt       i,j,rstart,ncols,flg;
667   PetscInt       *row,*col;
668   PetscBool      other_disassembled;
669   PetscScalar    *val;
670   InsertMode     addv = mat->insertmode;
671 
672   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
673 
674   PetscFunctionBegin;
675   if (!aij->donotstash && !mat->nooffprocentries) {
676     while (1) {
677       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
678       if (!flg) break;
679 
680       for (i=0; i<n; ) {
681         /* Now identify the consecutive vals belonging to the same row */
682         for (j=i,rstart=row[j]; j<n; j++) {
683           if (row[j] != rstart) break;
684         }
685         if (j < n) ncols = j-i;
686         else       ncols = n-i;
687         /* Now assemble all these values with a single function call */
688         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
689 
690         i = j;
691       }
692     }
693     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
694   }
695   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
696   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
697 
698   /* determine if any processor has disassembled, if so we must
699      also disassemble ourselfs, in order that we may reassemble. */
700   /*
701      if nonzero structure of submatrix B cannot change then we know that
702      no processor disassembled thus we can skip this stuff
703   */
704   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
705     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
706     if (mat->was_assembled && !other_disassembled) {
707       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
708     }
709   }
710   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
711     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
712   }
713   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
715   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
716   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
717 
718   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
719 
720   aij->rowvalues = 0;
721 
722   /* used by MatAXPY() */
723   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
724   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
725 
726   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
727   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
733 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
734 {
735   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
740   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatZeroRows_MPIAIJ"
746 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
747 {
748   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode    ierr;
750   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
751   PetscInt          i,*owners = A->rmap->range;
752   PetscInt          *nprocs,j,idx,nsends,row;
753   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
754   PetscInt          *rvalues,count,base,slen,*source;
755   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
756   MPI_Comm          comm;
757   MPI_Request       *send_waits,*recv_waits;
758   MPI_Status        recv_status,*send_status;
759   const PetscScalar *xx;
760   PetscScalar       *bb;
761 #if defined(PETSC_DEBUG)
762   PetscBool found = PETSC_FALSE;
763 #endif
764 
765   PetscFunctionBegin;
766   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
767   /*  first count number of contributors to each processor */
768   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
769   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
770   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
771   j    = 0;
772   for (i=0; i<N; i++) {
773     if (lastidx > (idx = rows[i])) j = 0;
774     lastidx = idx;
775     for (; j<size; j++) {
776       if (idx >= owners[j] && idx < owners[j+1]) {
777         nprocs[2*j]++;
778         nprocs[2*j+1] = 1;
779         owner[i]      = j;
780 #if defined(PETSC_DEBUG)
781         found = PETSC_TRUE;
782 #endif
783         break;
784       }
785     }
786 #if defined(PETSC_DEBUG)
787     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
788     found = PETSC_FALSE;
789 #endif
790   }
791   nsends = 0;
792   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
793 
794   if (A->nooffproczerorows) {
795     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
796     nrecvs = nsends;
797     nmax   = N;
798   } else {
799     /* inform other processors of number of messages and max length*/
800     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
801   }
802 
803   /* post receives:   */
804   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
805   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
806   for (i=0; i<nrecvs; i++) {
807     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
808   }
809 
810   /* do sends:
811       1) starts[i] gives the starting index in svalues for stuff going to
812          the ith processor
813   */
814   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
815   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
816   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
817 
818   starts[0] = 0;
819   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
820   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
821 
822   starts[0] = 0;
823   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
824   count = 0;
825   for (i=0; i<size; i++) {
826     if (nprocs[2*i+1]) {
827       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
828     }
829   }
830   ierr = PetscFree(starts);CHKERRQ(ierr);
831 
832   base = owners[rank];
833 
834   /*  wait on receives */
835   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
836   count = nrecvs; slen = 0;
837   while (count) {
838     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
839     /* unpack receives into our local space */
840     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
841 
842     source[imdex] = recv_status.MPI_SOURCE;
843     lens[imdex]   = n;
844     slen         += n;
845     count--;
846   }
847   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
848 
849   /* move the data into the send scatter */
850   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
851   count = 0;
852   for (i=0; i<nrecvs; i++) {
853     values = rvalues + i*nmax;
854     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
855   }
856   ierr = PetscFree(rvalues);CHKERRQ(ierr);
857   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
858   ierr = PetscFree(owner);CHKERRQ(ierr);
859   ierr = PetscFree(nprocs);CHKERRQ(ierr);
860 
861   /* fix right hand side if needed */
862   if (x && b) {
863     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
864     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
865     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
866     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
867     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
868   }
869   /*
870         Zero the required rows. If the "diagonal block" of the matrix
871      is square and the user wishes to set the diagonal we use separate
872      code so that MatSetValues() is not called for each diagonal allocating
873      new memory, thus calling lots of mallocs and slowing things down.
874 
875   */
876   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
877   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
878   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
879     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
880   } else if (diag != 0.0) {
881     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
882     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
883     for (i = 0; i < slen; i++) {
884       row  = lrows[i] + rstart;
885       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
886     }
887     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
889   } else {
890     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
891   }
892   ierr = PetscFree(lrows);CHKERRQ(ierr);
893 
894   /* wait on sends */
895   if (nsends) {
896     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
897     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
898     ierr = PetscFree(send_status);CHKERRQ(ierr);
899   }
900   ierr = PetscFree(send_waits);CHKERRQ(ierr);
901   ierr = PetscFree(svalues);CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNCT__
906 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
907 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
908 {
909   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
910   PetscErrorCode    ierr;
911   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
912   PetscInt          i,*owners = A->rmap->range;
913   PetscInt          *nprocs,j,idx,nsends;
914   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
915   PetscInt          *rvalues,count,base,slen,*source;
916   PetscInt          *lens,*lrows,*values,m;
917   MPI_Comm          comm;
918   MPI_Request       *send_waits,*recv_waits;
919   MPI_Status        recv_status,*send_status;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 #if defined(PETSC_DEBUG)
927   PetscBool found = PETSC_FALSE;
928 #endif
929 
930   PetscFunctionBegin;
931   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
932   /*  first count number of contributors to each processor */
933   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
934   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
935   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
936   j    = 0;
937   for (i=0; i<N; i++) {
938     if (lastidx > (idx = rows[i])) j = 0;
939     lastidx = idx;
940     for (; j<size; j++) {
941       if (idx >= owners[j] && idx < owners[j+1]) {
942         nprocs[2*j]++;
943         nprocs[2*j+1] = 1;
944         owner[i]      = j;
945 #if defined(PETSC_DEBUG)
946         found = PETSC_TRUE;
947 #endif
948         break;
949       }
950     }
951 #if defined(PETSC_DEBUG)
952     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
953     found = PETSC_FALSE;
954 #endif
955   }
956   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
957 
958   /* inform other processors of number of messages and max length*/
959   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
960 
961   /* post receives:   */
962   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
963   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
964   for (i=0; i<nrecvs; i++) {
965     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
966   }
967 
968   /* do sends:
969       1) starts[i] gives the starting index in svalues for stuff going to
970          the ith processor
971   */
972   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
973   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
974   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
975 
976   starts[0] = 0;
977   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
978   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
979 
980   starts[0] = 0;
981   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
982   count = 0;
983   for (i=0; i<size; i++) {
984     if (nprocs[2*i+1]) {
985       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
986     }
987   }
988   ierr = PetscFree(starts);CHKERRQ(ierr);
989 
990   base = owners[rank];
991 
992   /*  wait on receives */
993   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
994   count = nrecvs; slen = 0;
995   while (count) {
996     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
997     /* unpack receives into our local space */
998     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
999 
1000     source[imdex] = recv_status.MPI_SOURCE;
1001     lens[imdex]   = n;
1002     slen         += n;
1003     count--;
1004   }
1005   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1006 
1007   /* move the data into the send scatter */
1008   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1009   count = 0;
1010   for (i=0; i<nrecvs; i++) {
1011     values = rvalues + i*nmax;
1012     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1013   }
1014   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1015   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1016   ierr = PetscFree(owner);CHKERRQ(ierr);
1017   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1018   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1019 
1020   /* zero diagonal part of matrix */
1021   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1022 
1023   /* handle off diagonal part of matrix */
1024   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1025   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1026   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1027   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1028   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1029   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1031   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1032   if (x) {
1033     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1036     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1037   }
1038   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1039 
1040   /* remove zeroed rows of off diagonal matrix */
1041   ii = aij->i;
1042   for (i=0; i<slen; i++) {
1043     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1044   }
1045 
1046   /* loop over all elements of off process part of matrix zeroing removed columns*/
1047   if (aij->compressedrow.use) {
1048     m    = aij->compressedrow.nrows;
1049     ii   = aij->compressedrow.i;
1050     ridx = aij->compressedrow.rindex;
1051     for (i=0; i<m; i++) {
1052       n  = ii[i+1] - ii[i];
1053       aj = aij->j + ii[i];
1054       aa = aij->a + ii[i];
1055 
1056       for (j=0; j<n; j++) {
1057         if (PetscAbsScalar(mask[*aj])) {
1058           if (b) bb[*ridx] -= *aa*xx[*aj];
1059           *aa = 0.0;
1060         }
1061         aa++;
1062         aj++;
1063       }
1064       ridx++;
1065     }
1066   } else { /* do not use compressed row format */
1067     m = l->B->rmap->n;
1068     for (i=0; i<m; i++) {
1069       n  = ii[i+1] - ii[i];
1070       aj = aij->j + ii[i];
1071       aa = aij->a + ii[i];
1072       for (j=0; j<n; j++) {
1073         if (PetscAbsScalar(mask[*aj])) {
1074           if (b) bb[i] -= *aa*xx[*aj];
1075           *aa = 0.0;
1076         }
1077         aa++;
1078         aj++;
1079       }
1080     }
1081   }
1082   if (x) {
1083     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1084     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1085   }
1086   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1087   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1088   ierr = PetscFree(lrows);CHKERRQ(ierr);
1089 
1090   /* wait on sends */
1091   if (nsends) {
1092     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1093     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1094     ierr = PetscFree(send_status);CHKERRQ(ierr);
1095   }
1096   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1097   ierr = PetscFree(svalues);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatMult_MPIAIJ"
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108 
1109   PetscFunctionBegin;
1110   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1111   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1112   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1113   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1114   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1122 {
1123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1133 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1134 {
1135   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1141   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1148 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1149 {
1150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1151   PetscErrorCode ierr;
1152   PetscBool      merged;
1153 
1154   PetscFunctionBegin;
1155   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1156   /* do nondiagonal part */
1157   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1158   if (!merged) {
1159     /* send it on its way */
1160     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1161     /* do local part */
1162     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1163     /* receive remote parts: note this assumes the values are not actually */
1164     /* added in yy until the next line, */
1165     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1166   } else {
1167     /* do local part */
1168     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1169     /* send it on its way */
1170     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1171     /* values actually were received in the Begin() but we need to call this nop */
1172     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1173   }
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190   /* Easy test: symmetric diagonal block */
1191   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1192   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1193   if (!*f) PetscFunctionReturn(0);
1194   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1196   if (size == 1) PetscFunctionReturn(0);
1197 
1198   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1199   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1200   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1201   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1202   for (i=0; i<first; i++) notme[i] = i;
1203   for (i=last; i<M; i++) notme[i-last+first] = i;
1204   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1205   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1206   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1207   Aoff = Aoffs[0];
1208   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1209   Boff = Boffs[0];
1210   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1215   ierr = PetscFree(notme);CHKERRQ(ierr);
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1221 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1222 {
1223   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1224   PetscErrorCode ierr;
1225 
1226   PetscFunctionBegin;
1227   /* do nondiagonal part */
1228   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1229   /* send it on its way */
1230   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1231   /* do local part */
1232   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1233   /* receive remote parts */
1234   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 /*
1239   This only works correctly for square matrices where the subblock A->A is the
1240    diagonal block
1241 */
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1244 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1245 {
1246   PetscErrorCode ierr;
1247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1248 
1249   PetscFunctionBegin;
1250   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1251   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1252   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatScale_MPIAIJ"
1258 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1259 {
1260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1265   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatDestroy_MPIAIJ"
1271 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1272 {
1273   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1274   PetscErrorCode ierr;
1275 
1276   PetscFunctionBegin;
1277 #if defined(PETSC_USE_LOG)
1278   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1279 #endif
1280   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1281   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1283   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1284 #if defined(PETSC_USE_CTABLE)
1285   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1286 #else
1287   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1288 #endif
1289   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1290   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1291   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1292   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1293   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1294   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1295 
1296   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1311 {
1312   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1314   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1315   PetscErrorCode ierr;
1316   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1317   int            fd;
1318   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1319   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1320   PetscScalar    *column_values;
1321   PetscInt       message_count,flowcontrolcount;
1322   FILE           *file;
1323 
1324   PetscFunctionBegin;
1325   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1326   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1327   nz   = A->nz + B->nz;
1328   if (!rank) {
1329     header[0] = MAT_FILE_CLASSID;
1330     header[1] = mat->rmap->N;
1331     header[2] = mat->cmap->N;
1332 
1333     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1334     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1335     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     /* get largest number of rows any processor has */
1337     rlen  = mat->rmap->n;
1338     range = mat->rmap->range;
1339     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1340   } else {
1341     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1342     rlen = mat->rmap->n;
1343   }
1344 
1345   /* load up the local row counts */
1346   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1347   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1348 
1349   /* store the row lengths to the file */
1350   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1351   if (!rank) {
1352     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1353     for (i=1; i<size; i++) {
1354       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1355       rlen = range[i+1] - range[i];
1356       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1357       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     }
1359     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1360   } else {
1361     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1362     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1363     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1364   }
1365   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1366 
1367   /* load up the local column indices */
1368   nzmax = nz; /* th processor needs space a largest processor needs */
1369   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1370   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1371   cnt   = 0;
1372   for (i=0; i<mat->rmap->n; i++) {
1373     for (j=B->i[i]; j<B->i[i+1]; j++) {
1374       if ((col = garray[B->j[j]]) > cstart) break;
1375       column_indices[cnt++] = col;
1376     }
1377     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1378     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1379   }
1380   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1381 
1382   /* store the column indices to the file */
1383   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1384   if (!rank) {
1385     MPI_Status status;
1386     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1387     for (i=1; i<size; i++) {
1388       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1389       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1390       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1391       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1392       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1393     }
1394     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1395   } else {
1396     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1397     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1399     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1400   }
1401   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1402 
1403   /* load up the local column values */
1404   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1405   cnt  = 0;
1406   for (i=0; i<mat->rmap->n; i++) {
1407     for (j=B->i[i]; j<B->i[i+1]; j++) {
1408       if (garray[B->j[j]] > cstart) break;
1409       column_values[cnt++] = B->a[j];
1410     }
1411     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1412     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1413   }
1414   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1415 
1416   /* store the column values to the file */
1417   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1418   if (!rank) {
1419     MPI_Status status;
1420     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1421     for (i=1; i<size; i++) {
1422       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1423       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1424       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1425       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1426       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1427     }
1428     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1429   } else {
1430     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1431     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1433     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1434   }
1435   ierr = PetscFree(column_values);CHKERRQ(ierr);
1436 
1437   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1438   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 #include <petscdraw.h>
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1445 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1446 {
1447   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1448   PetscErrorCode    ierr;
1449   PetscMPIInt       rank = aij->rank,size = aij->size;
1450   PetscBool         isdraw,iascii,isbinary;
1451   PetscViewer       sviewer;
1452   PetscViewerFormat format;
1453 
1454   PetscFunctionBegin;
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1457   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1458   if (iascii) {
1459     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1460     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1461       MatInfo   info;
1462       PetscBool inodes;
1463 
1464       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1465       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1466       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1467       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1468       if (!inodes) {
1469         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1470                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1471       } else {
1472         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1473                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1474       }
1475       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1476       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1477       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1478       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1479       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1481       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1482       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1483       PetscFunctionReturn(0);
1484     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1485       PetscInt inodecount,inodelimit,*inodes;
1486       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1487       if (inodes) {
1488         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1489       } else {
1490         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1491       }
1492       PetscFunctionReturn(0);
1493     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1494       PetscFunctionReturn(0);
1495     }
1496   } else if (isbinary) {
1497     if (size == 1) {
1498       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1499       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1500     } else {
1501       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1502     }
1503     PetscFunctionReturn(0);
1504   } else if (isdraw) {
1505     PetscDraw draw;
1506     PetscBool isnull;
1507     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1508     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1509   }
1510 
1511   if (size == 1) {
1512     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1513     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1514   } else {
1515     /* assemble the entire matrix onto first processor. */
1516     Mat        A;
1517     Mat_SeqAIJ *Aloc;
1518     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1519     MatScalar  *a;
1520 
1521     if (mat->rmap->N > 1024) {
1522       PetscBool flg = PETSC_FALSE;
1523 
1524       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1525       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1526     }
1527 
1528     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1529     if (!rank) {
1530       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1531     } else {
1532       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1533     }
1534     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1535     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1536     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1537     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1538     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1539 
1540     /* copy over the A part */
1541     Aloc = (Mat_SeqAIJ*)aij->A->data;
1542     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1543     row  = mat->rmap->rstart;
1544     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1545     for (i=0; i<m; i++) {
1546       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1547       row++;
1548       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1549     }
1550     aj = Aloc->j;
1551     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1552 
1553     /* copy over the B part */
1554     Aloc = (Mat_SeqAIJ*)aij->B->data;
1555     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1556     row  = mat->rmap->rstart;
1557     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1558     ct   = cols;
1559     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1560     for (i=0; i<m; i++) {
1561       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1562       row++;
1563       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1564     }
1565     ierr = PetscFree(ct);CHKERRQ(ierr);
1566     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1568     /*
1569        Everyone has to call to draw the matrix since the graphics waits are
1570        synchronized across all processors that share the PetscDraw object
1571     */
1572     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1573     if (!rank) {
1574       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1575       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1576       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1577       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1578     }
1579     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1580     ierr = MatDestroy(&A);CHKERRQ(ierr);
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatView_MPIAIJ"
1587 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1588 {
1589   PetscErrorCode ierr;
1590   PetscBool      iascii,isdraw,issocket,isbinary;
1591 
1592   PetscFunctionBegin;
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1596   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1597   if (iascii || isdraw || isbinary || issocket) {
1598     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1599   }
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatSOR_MPIAIJ"
1605 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1606 {
1607   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1608   PetscErrorCode ierr;
1609   Vec            bb1 = 0;
1610   PetscBool      hasop;
1611 
1612   PetscFunctionBegin;
1613   if (flag == SOR_APPLY_UPPER) {
1614     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1615     PetscFunctionReturn(0);
1616   }
1617 
1618   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1619     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1620   }
1621 
1622   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1623     if (flag & SOR_ZERO_INITIAL_GUESS) {
1624       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1625       its--;
1626     }
1627 
1628     while (its--) {
1629       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1631 
1632       /* update rhs: bb1 = bb - B*x */
1633       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1634       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1635 
1636       /* local sweep */
1637       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1638     }
1639   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644     while (its--) {
1645       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647 
1648       /* update rhs: bb1 = bb - B*x */
1649       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1650       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1651 
1652       /* local sweep */
1653       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1654     }
1655   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1656     if (flag & SOR_ZERO_INITIAL_GUESS) {
1657       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1658       its--;
1659     }
1660     while (its--) {
1661       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663 
1664       /* update rhs: bb1 = bb - B*x */
1665       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1666       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1667 
1668       /* local sweep */
1669       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1670     }
1671   } else if (flag & SOR_EISENSTAT) {
1672     Vec xx1;
1673 
1674     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1675     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1676 
1677     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679     if (!mat->diag) {
1680       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1681       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1682     }
1683     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1684     if (hasop) {
1685       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1686     } else {
1687       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1688     }
1689     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1690 
1691     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1692 
1693     /* local sweep */
1694     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1695     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1696     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1697   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1698 
1699   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatPermute_MPIAIJ"
1705 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1706 {
1707   Mat            aA,aB,Aperm;
1708   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1709   PetscScalar    *aa,*ba;
1710   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1711   PetscSF        rowsf,sf;
1712   IS             parcolp = NULL;
1713   PetscBool      done;
1714   PetscErrorCode ierr;
1715 
1716   PetscFunctionBegin;
1717   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1718   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1719   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1720   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1721 
1722   /* Invert row permutation to find out where my rows should go */
1723   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1724   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1725   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1726   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1727   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1728   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1729 
1730   /* Invert column permutation to find out where my columns should go */
1731   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1732   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1733   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1734   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1735   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1737   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1738 
1739   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1740   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1741   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1742 
1743   /* Find out where my gcols should go */
1744   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1745   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1746   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1747   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1748   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1749   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1751   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1752 
1753   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1754   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1757   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1758   for (i=0; i<m; i++) {
1759     PetscInt row = rdest[i],rowner;
1760     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1761     for (j=ai[i]; j<ai[i+1]; j++) {
1762       PetscInt cowner,col = cdest[aj[j]];
1763       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1764       if (rowner == cowner) dnnz[i]++;
1765       else onnz[i]++;
1766     }
1767     for (j=bi[i]; j<bi[i+1]; j++) {
1768       PetscInt cowner,col = gcdest[bj[j]];
1769       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1770       if (rowner == cowner) dnnz[i]++;
1771       else onnz[i]++;
1772     }
1773   }
1774   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1776   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1780   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1781 
1782   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1784   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1785   for (i=0; i<m; i++) {
1786     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1787     PetscInt rowlen;
1788     rowlen = ai[i+1] - ai[i];
1789     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1790     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1791     rowlen = bi[i+1] - bi[i];
1792     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1793     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1794   }
1795   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1798   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1800   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1801   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1802   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1803   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1804   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1805   *B = Aperm;
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1811 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1812 {
1813   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1814   Mat            A    = mat->A,B = mat->B;
1815   PetscErrorCode ierr;
1816   PetscReal      isend[5],irecv[5];
1817 
1818   PetscFunctionBegin;
1819   info->block_size = 1.0;
1820   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1821 
1822   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1823   isend[3] = info->memory;  isend[4] = info->mallocs;
1824 
1825   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1826 
1827   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1828   isend[3] += info->memory;  isend[4] += info->mallocs;
1829   if (flag == MAT_LOCAL) {
1830     info->nz_used      = isend[0];
1831     info->nz_allocated = isend[1];
1832     info->nz_unneeded  = isend[2];
1833     info->memory       = isend[3];
1834     info->mallocs      = isend[4];
1835   } else if (flag == MAT_GLOBAL_MAX) {
1836     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1837 
1838     info->nz_used      = irecv[0];
1839     info->nz_allocated = irecv[1];
1840     info->nz_unneeded  = irecv[2];
1841     info->memory       = irecv[3];
1842     info->mallocs      = irecv[4];
1843   } else if (flag == MAT_GLOBAL_SUM) {
1844     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1845 
1846     info->nz_used      = irecv[0];
1847     info->nz_allocated = irecv[1];
1848     info->nz_unneeded  = irecv[2];
1849     info->memory       = irecv[3];
1850     info->mallocs      = irecv[4];
1851   }
1852   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1853   info->fill_ratio_needed = 0;
1854   info->factor_mallocs    = 0;
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatSetOption_MPIAIJ"
1860 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1861 {
1862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1863   PetscErrorCode ierr;
1864 
1865   PetscFunctionBegin;
1866   switch (op) {
1867   case MAT_NEW_NONZERO_LOCATIONS:
1868   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1869   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1870   case MAT_KEEP_NONZERO_PATTERN:
1871   case MAT_NEW_NONZERO_LOCATION_ERR:
1872   case MAT_USE_INODES:
1873   case MAT_IGNORE_ZERO_ENTRIES:
1874     MatCheckPreallocated(A,1);
1875     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1876     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1877     break;
1878   case MAT_ROW_ORIENTED:
1879     a->roworiented = flg;
1880 
1881     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1882     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1883     break;
1884   case MAT_NEW_DIAGONALS:
1885     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1886     break;
1887   case MAT_IGNORE_OFF_PROC_ENTRIES:
1888     a->donotstash = flg;
1889     break;
1890   case MAT_SPD:
1891     A->spd_set = PETSC_TRUE;
1892     A->spd     = flg;
1893     if (flg) {
1894       A->symmetric                  = PETSC_TRUE;
1895       A->structurally_symmetric     = PETSC_TRUE;
1896       A->symmetric_set              = PETSC_TRUE;
1897       A->structurally_symmetric_set = PETSC_TRUE;
1898     }
1899     break;
1900   case MAT_SYMMETRIC:
1901     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1902     break;
1903   case MAT_STRUCTURALLY_SYMMETRIC:
1904     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1905     break;
1906   case MAT_HERMITIAN:
1907     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1908     break;
1909   case MAT_SYMMETRY_ETERNAL:
1910     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1911     break;
1912   default:
1913     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatGetRow_MPIAIJ"
1920 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1921 {
1922   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1923   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1924   PetscErrorCode ierr;
1925   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1926   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1927   PetscInt       *cmap,*idx_p;
1928 
1929   PetscFunctionBegin;
1930   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1931   mat->getrowactive = PETSC_TRUE;
1932 
1933   if (!mat->rowvalues && (idx || v)) {
1934     /*
1935         allocate enough space to hold information from the longest row.
1936     */
1937     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1938     PetscInt   max = 1,tmp;
1939     for (i=0; i<matin->rmap->n; i++) {
1940       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1941       if (max < tmp) max = tmp;
1942     }
1943     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1944   }
1945 
1946   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1947   lrow = row - rstart;
1948 
1949   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1950   if (!v)   {pvA = 0; pvB = 0;}
1951   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1952   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1953   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1954   nztot = nzA + nzB;
1955 
1956   cmap = mat->garray;
1957   if (v  || idx) {
1958     if (nztot) {
1959       /* Sort by increasing column numbers, assuming A and B already sorted */
1960       PetscInt imark = -1;
1961       if (v) {
1962         *v = v_p = mat->rowvalues;
1963         for (i=0; i<nzB; i++) {
1964           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1965           else break;
1966         }
1967         imark = i;
1968         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1969         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1970       }
1971       if (idx) {
1972         *idx = idx_p = mat->rowindices;
1973         if (imark > -1) {
1974           for (i=0; i<imark; i++) {
1975             idx_p[i] = cmap[cworkB[i]];
1976           }
1977         } else {
1978           for (i=0; i<nzB; i++) {
1979             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1980             else break;
1981           }
1982           imark = i;
1983         }
1984         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1985         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1986       }
1987     } else {
1988       if (idx) *idx = 0;
1989       if (v)   *v   = 0;
1990     }
1991   }
1992   *nz  = nztot;
1993   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1994   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
2000 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2001 {
2002   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2003 
2004   PetscFunctionBegin;
2005   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2006   aij->getrowactive = PETSC_FALSE;
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatNorm_MPIAIJ"
2012 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2013 {
2014   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2015   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2016   PetscErrorCode ierr;
2017   PetscInt       i,j,cstart = mat->cmap->rstart;
2018   PetscReal      sum = 0.0;
2019   MatScalar      *v;
2020 
2021   PetscFunctionBegin;
2022   if (aij->size == 1) {
2023     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2024   } else {
2025     if (type == NORM_FROBENIUS) {
2026       v = amat->a;
2027       for (i=0; i<amat->nz; i++) {
2028         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2029       }
2030       v = bmat->a;
2031       for (i=0; i<bmat->nz; i++) {
2032         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2033       }
2034       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2035       *norm = PetscSqrtReal(*norm);
2036     } else if (type == NORM_1) { /* max column norm */
2037       PetscReal *tmp,*tmp2;
2038       PetscInt  *jj,*garray = aij->garray;
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2040       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2041       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2042       *norm = 0.0;
2043       v     = amat->a; jj = amat->j;
2044       for (j=0; j<amat->nz; j++) {
2045         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2046       }
2047       v = bmat->a; jj = bmat->j;
2048       for (j=0; j<bmat->nz; j++) {
2049         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2050       }
2051       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2052       for (j=0; j<mat->cmap->N; j++) {
2053         if (tmp2[j] > *norm) *norm = tmp2[j];
2054       }
2055       ierr = PetscFree(tmp);CHKERRQ(ierr);
2056       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2057     } else if (type == NORM_INFINITY) { /* max row norm */
2058       PetscReal ntemp = 0.0;
2059       for (j=0; j<aij->A->rmap->n; j++) {
2060         v   = amat->a + amat->i[j];
2061         sum = 0.0;
2062         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2063           sum += PetscAbsScalar(*v); v++;
2064         }
2065         v = bmat->a + bmat->i[j];
2066         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2067           sum += PetscAbsScalar(*v); v++;
2068         }
2069         if (sum > ntemp) ntemp = sum;
2070       }
2071       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2072     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2073   }
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatTranspose_MPIAIJ"
2079 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2080 {
2081   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2082   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2083   PetscErrorCode ierr;
2084   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2085   PetscInt       cstart = A->cmap->rstart,ncol;
2086   Mat            B;
2087   MatScalar      *array;
2088 
2089   PetscFunctionBegin;
2090   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2091 
2092   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2093   ai = Aloc->i; aj = Aloc->j;
2094   bi = Bloc->i; bj = Bloc->j;
2095   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2096     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2097     PetscSFNode          *oloc;
2098     PETSC_UNUSED PetscSF sf;
2099 
2100     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2101     /* compute d_nnz for preallocation */
2102     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2103     for (i=0; i<ai[ma]; i++) {
2104       d_nnz[aj[i]]++;
2105       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2106     }
2107     /* compute local off-diagonal contributions */
2108     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2109     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2110     /* map those to global */
2111     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2112     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2113     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2114     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2115     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2117     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2118 
2119     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2120     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2121     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2122     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2123     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2124     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2125   } else {
2126     B    = *matout;
2127     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2128     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2129   }
2130 
2131   /* copy over the A part */
2132   array = Aloc->a;
2133   row   = A->rmap->rstart;
2134   for (i=0; i<ma; i++) {
2135     ncol = ai[i+1]-ai[i];
2136     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2137     row++;
2138     array += ncol; aj += ncol;
2139   }
2140   aj = Aloc->j;
2141   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2142 
2143   /* copy over the B part */
2144   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2145   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2146   array = Bloc->a;
2147   row   = A->rmap->rstart;
2148   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2149   cols_tmp = cols;
2150   for (i=0; i<mb; i++) {
2151     ncol = bi[i+1]-bi[i];
2152     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2153     row++;
2154     array += ncol; cols_tmp += ncol;
2155   }
2156   ierr = PetscFree(cols);CHKERRQ(ierr);
2157 
2158   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2160   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2161     *matout = B;
2162   } else {
2163     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2170 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2171 {
2172   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2173   Mat            a    = aij->A,b = aij->B;
2174   PetscErrorCode ierr;
2175   PetscInt       s1,s2,s3;
2176 
2177   PetscFunctionBegin;
2178   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2179   if (rr) {
2180     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2181     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2182     /* Overlap communication with computation. */
2183     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2184   }
2185   if (ll) {
2186     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2187     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2188     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2189   }
2190   /* scale  the diagonal block */
2191   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2192 
2193   if (rr) {
2194     /* Do a scatter end and then right scale the off-diagonal block */
2195     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2196     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2197   }
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 #undef __FUNCT__
2202 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2203 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2204 {
2205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2206   PetscErrorCode ierr;
2207 
2208   PetscFunctionBegin;
2209   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 #undef __FUNCT__
2214 #define __FUNCT__ "MatEqual_MPIAIJ"
2215 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2216 {
2217   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2218   Mat            a,b,c,d;
2219   PetscBool      flg;
2220   PetscErrorCode ierr;
2221 
2222   PetscFunctionBegin;
2223   a = matA->A; b = matA->B;
2224   c = matB->A; d = matB->B;
2225 
2226   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2227   if (flg) {
2228     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2229   }
2230   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 #undef __FUNCT__
2235 #define __FUNCT__ "MatCopy_MPIAIJ"
2236 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2237 {
2238   PetscErrorCode ierr;
2239   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2240   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2241 
2242   PetscFunctionBegin;
2243   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2244   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2245     /* because of the column compression in the off-processor part of the matrix a->B,
2246        the number of columns in a->B and b->B may be different, hence we cannot call
2247        the MatCopy() directly on the two parts. If need be, we can provide a more
2248        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2249        then copying the submatrices */
2250     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2251   } else {
2252     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2253     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2254   }
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatSetUp_MPIAIJ"
2260 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2261 {
2262   PetscErrorCode ierr;
2263 
2264   PetscFunctionBegin;
2265   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2271 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2272 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2273 {
2274   PetscInt       i,m=Y->rmap->N;
2275   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2276   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2277   const PetscInt *xi = x->i,*yi = y->i;
2278 
2279   PetscFunctionBegin;
2280   /* Set the number of nonzeros in the new matrix */
2281   for (i=0; i<m; i++) {
2282     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2283     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2284     nnz[i] = 0;
2285     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2286       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2287       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2288       nnz[i]++;
2289     }
2290     for (; k<nzy; k++) nnz[i]++;
2291   }
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatAXPY_MPIAIJ"
2297 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2298 {
2299   PetscErrorCode ierr;
2300   PetscInt       i;
2301   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2302   PetscBLASInt   bnz,one=1;
2303   Mat_SeqAIJ     *x,*y;
2304 
2305   PetscFunctionBegin;
2306   if (str == SAME_NONZERO_PATTERN) {
2307     PetscScalar alpha = a;
2308     x    = (Mat_SeqAIJ*)xx->A->data;
2309     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2310     y    = (Mat_SeqAIJ*)yy->A->data;
2311     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2312     x    = (Mat_SeqAIJ*)xx->B->data;
2313     y    = (Mat_SeqAIJ*)yy->B->data;
2314     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2315     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2316   } else if (str == SUBSET_NONZERO_PATTERN) {
2317     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2318 
2319     x = (Mat_SeqAIJ*)xx->B->data;
2320     y = (Mat_SeqAIJ*)yy->B->data;
2321     if (y->xtoy && y->XtoY != xx->B) {
2322       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2323       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2324     }
2325     if (!y->xtoy) { /* get xtoy */
2326       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2327       y->XtoY = xx->B;
2328       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2329     }
2330     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2331   } else {
2332     Mat      B;
2333     PetscInt *nnz_d,*nnz_o;
2334     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2335     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2336     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2337     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2338     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2339     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2340     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2342     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2343     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2344     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2345     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2347     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2348   }
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2353 
2354 #undef __FUNCT__
2355 #define __FUNCT__ "MatConjugate_MPIAIJ"
2356 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2357 {
2358 #if defined(PETSC_USE_COMPLEX)
2359   PetscErrorCode ierr;
2360   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2361 
2362   PetscFunctionBegin;
2363   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2364   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2365 #else
2366   PetscFunctionBegin;
2367 #endif
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 #undef __FUNCT__
2372 #define __FUNCT__ "MatRealPart_MPIAIJ"
2373 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2374 {
2375   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2380   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2386 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2387 {
2388   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2393   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #if defined(PETSC_HAVE_PBGL)
2398 
2399 #include <boost/parallel/mpi/bsp_process_group.hpp>
2400 #include <boost/graph/distributed/ilu_default_graph.hpp>
2401 #include <boost/graph/distributed/ilu_0_block.hpp>
2402 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2403 #include <boost/graph/distributed/petsc/interface.hpp>
2404 #include <boost/multi_array.hpp>
2405 #include <boost/parallel/distributed_property_map->hpp>
2406 
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2409 /*
2410   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2411 */
2412 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2413 {
2414   namespace petsc = boost::distributed::petsc;
2415 
2416   namespace graph_dist = boost::graph::distributed;
2417   using boost::graph::distributed::ilu_default::process_group_type;
2418   using boost::graph::ilu_permuted;
2419 
2420   PetscBool      row_identity, col_identity;
2421   PetscContainer c;
2422   PetscInt       m, n, M, N;
2423   PetscErrorCode ierr;
2424 
2425   PetscFunctionBegin;
2426   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2427   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2428   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2429   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2430 
2431   process_group_type pg;
2432   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2433   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2434   lgraph_type& level_graph = *lgraph_p;
2435   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2436 
2437   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2438   ilu_permuted(level_graph);
2439 
2440   /* put together the new matrix */
2441   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2442   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2443   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2444   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2445   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2446   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2447   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2449 
2450   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2451   ierr = PetscContainerSetPointer(c, lgraph_p);
2452   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2453   ierr = PetscContainerDestroy(&c);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 #undef __FUNCT__
2458 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2459 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2460 {
2461   PetscFunctionBegin;
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatSolve_MPIAIJ"
2467 /*
2468   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2469 */
2470 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2471 {
2472   namespace graph_dist = boost::graph::distributed;
2473 
2474   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2475   lgraph_type    *lgraph_p;
2476   PetscContainer c;
2477   PetscErrorCode ierr;
2478 
2479   PetscFunctionBegin;
2480   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2481   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2482   ierr = VecCopy(b, x);CHKERRQ(ierr);
2483 
2484   PetscScalar *array_x;
2485   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2486   PetscInt sx;
2487   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2488 
2489   PetscScalar *array_b;
2490   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2491   PetscInt sb;
2492   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2493 
2494   lgraph_type& level_graph = *lgraph_p;
2495   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2496 
2497   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2498   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2499   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2500 
2501   typedef boost::iterator_property_map<array_ref_type::iterator,
2502                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2503   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2504   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2505 
2506   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2507   PetscFunctionReturn(0);
2508 }
2509 #endif
2510 
2511 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2512   PetscInt    nzlocal,nsends,nrecvs;
2513   PetscMPIInt *send_rank,*recv_rank;
2514   PetscInt    *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2515   PetscScalar *sbuf_a,**rbuf_a;
2516   PetscErrorCode (*Destroy)(Mat);
2517 } Mat_Redundant;
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2521 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2522 {
2523   PetscErrorCode ierr;
2524   Mat_Redundant  *redund=(Mat_Redundant*)ptr;
2525   PetscInt       i;
2526 
2527   PetscFunctionBegin;
2528   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2529   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2530   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2531   for (i=0; i<redund->nrecvs; i++) {
2532     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2533     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2534   }
2535   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2536   ierr = PetscFree(redund);CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatDestroy_MatRedundant"
2542 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2543 {
2544   PetscErrorCode ierr;
2545   PetscContainer container;
2546   Mat_Redundant  *redund=NULL;
2547 
2548   PetscFunctionBegin;
2549   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2550   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2551   ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2552 
2553   A->ops->destroy = redund->Destroy;
2554 
2555   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2556   if (A->ops->destroy) {
2557     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2558   }
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2564 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2565 {
2566   PetscMPIInt    rank,size;
2567   MPI_Comm       comm;
2568   PetscErrorCode ierr;
2569   PetscInt       nsends    = 0,nrecvs=0,i,rownz_max=0;
2570   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL;
2571   PetscInt       *rowrange = mat->rmap->range;
2572   Mat_MPIAIJ     *aij      = (Mat_MPIAIJ*)mat->data;
2573   Mat            A         = aij->A,B=aij->B,C=*matredundant;
2574   Mat_SeqAIJ     *a        = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2575   PetscScalar    *sbuf_a;
2576   PetscInt       nzlocal=a->nz+b->nz;
2577   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2578   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2579   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2580   MatScalar      *aworkA,*aworkB;
2581   PetscScalar    *vals;
2582   PetscMPIInt    tag1,tag2,tag3,imdex;
2583   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2584   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2585   MPI_Status     recv_status,*send_status;
2586   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2587   PetscInt       **rbuf_j=NULL;
2588   PetscScalar    **rbuf_a=NULL;
2589   Mat_Redundant  *redund =NULL;
2590   PetscContainer container;
2591 
2592   PetscFunctionBegin;
2593   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2594   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2595   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2596 
2597   if (reuse == MAT_REUSE_MATRIX) {
2598     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2599     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2600     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2601     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2602     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2603     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2604     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2605     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2606 
2607     nsends    = redund->nsends;
2608     nrecvs    = redund->nrecvs;
2609     send_rank = redund->send_rank;
2610     recv_rank = redund->recv_rank;
2611     sbuf_nz   = redund->sbuf_nz;
2612     rbuf_nz   = redund->rbuf_nz;
2613     sbuf_j    = redund->sbuf_j;
2614     sbuf_a    = redund->sbuf_a;
2615     rbuf_j    = redund->rbuf_j;
2616     rbuf_a    = redund->rbuf_a;
2617   }
2618 
2619   if (reuse == MAT_INITIAL_MATRIX) {
2620     PetscMPIInt subrank,subsize;
2621     PetscInt    nleftover,np_subcomm;
2622     /* get the destination processors' id send_rank, nsends and nrecvs */
2623     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2624     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2625     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2626 
2627     np_subcomm = size/nsubcomm;
2628     nleftover  = size - nsubcomm*np_subcomm;
2629 
2630     nsends = 0; nrecvs = 0;
2631     for (i=0; i<size; i++) { /* i=rank*/
2632       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2633         send_rank[nsends]   = i; nsends++;
2634         recv_rank[nrecvs++] = i;
2635       }
2636     }
2637     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2638       i = size-nleftover-1;
2639       j = 0;
2640       while (j < nsubcomm - nleftover) {
2641         send_rank[nsends++] = i;
2642         i--; j++;
2643       }
2644     }
2645 
2646     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2647       for (i=0; i<nleftover; i++) {
2648         recv_rank[nrecvs++] = size-nleftover+i;
2649       }
2650     }
2651 
2652     /* allocate sbuf_j, sbuf_a */
2653     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2654     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2655     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2656   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2657 
2658   /* copy mat's local entries into the buffers */
2659   if (reuse == MAT_INITIAL_MATRIX) {
2660     rownz_max = 0;
2661     rptr      = sbuf_j;
2662     cols      = sbuf_j + rend-rstart + 1;
2663     vals      = sbuf_a;
2664     rptr[0]   = 0;
2665     for (i=0; i<rend-rstart; i++) {
2666       row    = i + rstart;
2667       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2668       ncols  = nzA + nzB;
2669       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2670       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2671       /* load the column indices for this row into cols */
2672       lwrite = 0;
2673       for (l=0; l<nzB; l++) {
2674         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2675           vals[lwrite]   = aworkB[l];
2676           cols[lwrite++] = ctmp;
2677         }
2678       }
2679       for (l=0; l<nzA; l++) {
2680         vals[lwrite]   = aworkA[l];
2681         cols[lwrite++] = cstart + cworkA[l];
2682       }
2683       for (l=0; l<nzB; l++) {
2684         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2685           vals[lwrite]   = aworkB[l];
2686           cols[lwrite++] = ctmp;
2687         }
2688       }
2689       vals     += ncols;
2690       cols     += ncols;
2691       rptr[i+1] = rptr[i] + ncols;
2692       if (rownz_max < ncols) rownz_max = ncols;
2693     }
2694     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2695   } else { /* only copy matrix values into sbuf_a */
2696     rptr    = sbuf_j;
2697     vals    = sbuf_a;
2698     rptr[0] = 0;
2699     for (i=0; i<rend-rstart; i++) {
2700       row    = i + rstart;
2701       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2702       ncols  = nzA + nzB;
2703       cworkB = b->j + b->i[i];
2704       aworkA = a->a + a->i[i];
2705       aworkB = b->a + b->i[i];
2706       lwrite = 0;
2707       for (l=0; l<nzB; l++) {
2708         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2709       }
2710       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2711       for (l=0; l<nzB; l++) {
2712         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2713       }
2714       vals     += ncols;
2715       rptr[i+1] = rptr[i] + ncols;
2716     }
2717   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2718 
2719   /* send nzlocal to others, and recv other's nzlocal */
2720   /*--------------------------------------------------*/
2721   if (reuse == MAT_INITIAL_MATRIX) {
2722     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2723 
2724     s_waits2 = s_waits3 + nsends;
2725     s_waits1 = s_waits2 + nsends;
2726     r_waits1 = s_waits1 + nsends;
2727     r_waits2 = r_waits1 + nrecvs;
2728     r_waits3 = r_waits2 + nrecvs;
2729   } else {
2730     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2731 
2732     r_waits3 = s_waits3 + nsends;
2733   }
2734 
2735   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2736   if (reuse == MAT_INITIAL_MATRIX) {
2737     /* get new tags to keep the communication clean */
2738     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2739     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2740     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2741 
2742     /* post receives of other's nzlocal */
2743     for (i=0; i<nrecvs; i++) {
2744       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2745     }
2746     /* send nzlocal to others */
2747     for (i=0; i<nsends; i++) {
2748       sbuf_nz[i] = nzlocal;
2749       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2750     }
2751     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2752     count = nrecvs;
2753     while (count) {
2754       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2755 
2756       recv_rank[imdex] = recv_status.MPI_SOURCE;
2757       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2758       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2759 
2760       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2761 
2762       rbuf_nz[imdex] += i + 2;
2763 
2764       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2765       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2766       count--;
2767     }
2768     /* wait on sends of nzlocal */
2769     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2770     /* send mat->i,j to others, and recv from other's */
2771     /*------------------------------------------------*/
2772     for (i=0; i<nsends; i++) {
2773       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2774       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2775     }
2776     /* wait on receives of mat->i,j */
2777     /*------------------------------*/
2778     count = nrecvs;
2779     while (count) {
2780       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2781       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2782       count--;
2783     }
2784     /* wait on sends of mat->i,j */
2785     /*---------------------------*/
2786     if (nsends) {
2787       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2788     }
2789   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2790 
2791   /* post receives, send and receive mat->a */
2792   /*----------------------------------------*/
2793   for (imdex=0; imdex<nrecvs; imdex++) {
2794     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2795   }
2796   for (i=0; i<nsends; i++) {
2797     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2798   }
2799   count = nrecvs;
2800   while (count) {
2801     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2802     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2803     count--;
2804   }
2805   if (nsends) {
2806     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2807   }
2808 
2809   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2810 
2811   /* create redundant matrix */
2812   /*-------------------------*/
2813   if (reuse == MAT_INITIAL_MATRIX) {
2814     /* compute rownz_max for preallocation */
2815     for (imdex=0; imdex<nrecvs; imdex++) {
2816       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2817       rptr = rbuf_j[imdex];
2818       for (i=0; i<j; i++) {
2819         ncols = rptr[i+1] - rptr[i];
2820         if (rownz_max < ncols) rownz_max = ncols;
2821       }
2822     }
2823 
2824     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2825     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2826     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2827     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2828     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2829     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2830   } else {
2831     C = *matredundant;
2832   }
2833 
2834   /* insert local matrix entries */
2835   rptr = sbuf_j;
2836   cols = sbuf_j + rend-rstart + 1;
2837   vals = sbuf_a;
2838   for (i=0; i<rend-rstart; i++) {
2839     row   = i + rstart;
2840     ncols = rptr[i+1] - rptr[i];
2841     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2842     vals += ncols;
2843     cols += ncols;
2844   }
2845   /* insert received matrix entries */
2846   for (imdex=0; imdex<nrecvs; imdex++) {
2847     rstart = rowrange[recv_rank[imdex]];
2848     rend   = rowrange[recv_rank[imdex]+1];
2849     rptr   = rbuf_j[imdex];
2850     cols   = rbuf_j[imdex] + rend-rstart + 1;
2851     vals   = rbuf_a[imdex];
2852     for (i=0; i<rend-rstart; i++) {
2853       row   = i + rstart;
2854       ncols = rptr[i+1] - rptr[i];
2855       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2856       vals += ncols;
2857       cols += ncols;
2858     }
2859   }
2860   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2861   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2863   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2864   if (reuse == MAT_INITIAL_MATRIX) {
2865     PetscContainer container;
2866     *matredundant = C;
2867     /* create a supporting struct and attach it to C for reuse */
2868     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2869     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2870     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2871     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2872     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2873     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2874 
2875     redund->nzlocal   = nzlocal;
2876     redund->nsends    = nsends;
2877     redund->nrecvs    = nrecvs;
2878     redund->send_rank = send_rank;
2879     redund->recv_rank = recv_rank;
2880     redund->sbuf_nz   = sbuf_nz;
2881     redund->rbuf_nz   = rbuf_nz;
2882     redund->sbuf_j    = sbuf_j;
2883     redund->sbuf_a    = sbuf_a;
2884     redund->rbuf_j    = rbuf_j;
2885     redund->rbuf_a    = rbuf_a;
2886 
2887     redund->Destroy = C->ops->destroy;
2888     C->ops->destroy = MatDestroy_MatRedundant;
2889   }
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 #undef __FUNCT__
2894 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2895 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2896 {
2897   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2898   PetscErrorCode ierr;
2899   PetscInt       i,*idxb = 0;
2900   PetscScalar    *va,*vb;
2901   Vec            vtmp;
2902 
2903   PetscFunctionBegin;
2904   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2905   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2906   if (idx) {
2907     for (i=0; i<A->rmap->n; i++) {
2908       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2909     }
2910   }
2911 
2912   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2913   if (idx) {
2914     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2915   }
2916   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2917   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2918 
2919   for (i=0; i<A->rmap->n; i++) {
2920     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2921       va[i] = vb[i];
2922       if (idx) idx[i] = a->garray[idxb[i]];
2923     }
2924   }
2925 
2926   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2927   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2928   ierr = PetscFree(idxb);CHKERRQ(ierr);
2929   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2930   PetscFunctionReturn(0);
2931 }
2932 
2933 #undef __FUNCT__
2934 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2935 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2936 {
2937   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2938   PetscErrorCode ierr;
2939   PetscInt       i,*idxb = 0;
2940   PetscScalar    *va,*vb;
2941   Vec            vtmp;
2942 
2943   PetscFunctionBegin;
2944   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2945   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2946   if (idx) {
2947     for (i=0; i<A->cmap->n; i++) {
2948       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2949     }
2950   }
2951 
2952   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2953   if (idx) {
2954     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2955   }
2956   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2957   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2958 
2959   for (i=0; i<A->rmap->n; i++) {
2960     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2961       va[i] = vb[i];
2962       if (idx) idx[i] = a->garray[idxb[i]];
2963     }
2964   }
2965 
2966   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2967   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2968   ierr = PetscFree(idxb);CHKERRQ(ierr);
2969   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2975 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2976 {
2977   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2978   PetscInt       n      = A->rmap->n;
2979   PetscInt       cstart = A->cmap->rstart;
2980   PetscInt       *cmap  = mat->garray;
2981   PetscInt       *diagIdx, *offdiagIdx;
2982   Vec            diagV, offdiagV;
2983   PetscScalar    *a, *diagA, *offdiagA;
2984   PetscInt       r;
2985   PetscErrorCode ierr;
2986 
2987   PetscFunctionBegin;
2988   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2989   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2990   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2991   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2992   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2993   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2994   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2995   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2996   for (r = 0; r < n; ++r) {
2997     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2998       a[r]   = diagA[r];
2999       idx[r] = cstart + diagIdx[r];
3000     } else {
3001       a[r]   = offdiagA[r];
3002       idx[r] = cmap[offdiagIdx[r]];
3003     }
3004   }
3005   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3006   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3007   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3008   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3009   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3010   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3011   PetscFunctionReturn(0);
3012 }
3013 
3014 #undef __FUNCT__
3015 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3016 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3017 {
3018   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3019   PetscInt       n      = A->rmap->n;
3020   PetscInt       cstart = A->cmap->rstart;
3021   PetscInt       *cmap  = mat->garray;
3022   PetscInt       *diagIdx, *offdiagIdx;
3023   Vec            diagV, offdiagV;
3024   PetscScalar    *a, *diagA, *offdiagA;
3025   PetscInt       r;
3026   PetscErrorCode ierr;
3027 
3028   PetscFunctionBegin;
3029   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3030   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3031   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3032   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3033   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3034   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3035   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3036   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3037   for (r = 0; r < n; ++r) {
3038     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3039       a[r]   = diagA[r];
3040       idx[r] = cstart + diagIdx[r];
3041     } else {
3042       a[r]   = offdiagA[r];
3043       idx[r] = cmap[offdiagIdx[r]];
3044     }
3045   }
3046   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3047   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3048   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3049   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3050   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3051   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 #undef __FUNCT__
3056 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3057 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3058 {
3059   PetscErrorCode ierr;
3060   Mat            *dummy;
3061 
3062   PetscFunctionBegin;
3063   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3064   *newmat = *dummy;
3065   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3073 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3074 {
3075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3076   PetscErrorCode ierr;
3077 
3078   PetscFunctionBegin;
3079   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3080   PetscFunctionReturn(0);
3081 }
3082 
3083 #undef __FUNCT__
3084 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3085 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3086 {
3087   PetscErrorCode ierr;
3088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3089 
3090   PetscFunctionBegin;
3091   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3092   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3093   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3094   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 /* -------------------------------------------------------------------*/
3099 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3100                                        MatGetRow_MPIAIJ,
3101                                        MatRestoreRow_MPIAIJ,
3102                                        MatMult_MPIAIJ,
3103                                 /* 4*/ MatMultAdd_MPIAIJ,
3104                                        MatMultTranspose_MPIAIJ,
3105                                        MatMultTransposeAdd_MPIAIJ,
3106 #if defined(PETSC_HAVE_PBGL)
3107                                        MatSolve_MPIAIJ,
3108 #else
3109                                        0,
3110 #endif
3111                                        0,
3112                                        0,
3113                                 /*10*/ 0,
3114                                        0,
3115                                        0,
3116                                        MatSOR_MPIAIJ,
3117                                        MatTranspose_MPIAIJ,
3118                                 /*15*/ MatGetInfo_MPIAIJ,
3119                                        MatEqual_MPIAIJ,
3120                                        MatGetDiagonal_MPIAIJ,
3121                                        MatDiagonalScale_MPIAIJ,
3122                                        MatNorm_MPIAIJ,
3123                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3124                                        MatAssemblyEnd_MPIAIJ,
3125                                        MatSetOption_MPIAIJ,
3126                                        MatZeroEntries_MPIAIJ,
3127                                 /*24*/ MatZeroRows_MPIAIJ,
3128                                        0,
3129 #if defined(PETSC_HAVE_PBGL)
3130                                        0,
3131 #else
3132                                        0,
3133 #endif
3134                                        0,
3135                                        0,
3136                                 /*29*/ MatSetUp_MPIAIJ,
3137 #if defined(PETSC_HAVE_PBGL)
3138                                        0,
3139 #else
3140                                        0,
3141 #endif
3142                                        0,
3143                                        0,
3144                                        0,
3145                                 /*34*/ MatDuplicate_MPIAIJ,
3146                                        0,
3147                                        0,
3148                                        0,
3149                                        0,
3150                                 /*39*/ MatAXPY_MPIAIJ,
3151                                        MatGetSubMatrices_MPIAIJ,
3152                                        MatIncreaseOverlap_MPIAIJ,
3153                                        MatGetValues_MPIAIJ,
3154                                        MatCopy_MPIAIJ,
3155                                 /*44*/ MatGetRowMax_MPIAIJ,
3156                                        MatScale_MPIAIJ,
3157                                        0,
3158                                        0,
3159                                        MatZeroRowsColumns_MPIAIJ,
3160                                 /*49*/ MatSetRandom_MPIAIJ,
3161                                        0,
3162                                        0,
3163                                        0,
3164                                        0,
3165                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3166                                        0,
3167                                        MatSetUnfactored_MPIAIJ,
3168                                        MatPermute_MPIAIJ,
3169                                        0,
3170                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3171                                        MatDestroy_MPIAIJ,
3172                                        MatView_MPIAIJ,
3173                                        0,
3174                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3175                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3176                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3177                                        0,
3178                                        0,
3179                                        0,
3180                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3181                                        MatGetRowMinAbs_MPIAIJ,
3182                                        0,
3183                                        MatSetColoring_MPIAIJ,
3184                                        0,
3185                                        MatSetValuesAdifor_MPIAIJ,
3186                                 /*75*/ MatFDColoringApply_AIJ,
3187                                        0,
3188                                        0,
3189                                        0,
3190                                        MatFindZeroDiagonals_MPIAIJ,
3191                                 /*80*/ 0,
3192                                        0,
3193                                        0,
3194                                 /*83*/ MatLoad_MPIAIJ,
3195                                        0,
3196                                        0,
3197                                        0,
3198                                        0,
3199                                        0,
3200                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3201                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3202                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3203                                        MatPtAP_MPIAIJ_MPIAIJ,
3204                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3205                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3206                                        0,
3207                                        0,
3208                                        0,
3209                                        0,
3210                                 /*99*/ 0,
3211                                        0,
3212                                        0,
3213                                        MatConjugate_MPIAIJ,
3214                                        0,
3215                                 /*104*/MatSetValuesRow_MPIAIJ,
3216                                        MatRealPart_MPIAIJ,
3217                                        MatImaginaryPart_MPIAIJ,
3218                                        0,
3219                                        0,
3220                                 /*109*/0,
3221                                        MatGetRedundantMatrix_MPIAIJ,
3222                                        MatGetRowMin_MPIAIJ,
3223                                        0,
3224                                        0,
3225                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3226                                        0,
3227                                        0,
3228                                        0,
3229                                        0,
3230                                 /*119*/0,
3231                                        0,
3232                                        0,
3233                                        0,
3234                                        MatGetMultiProcBlock_MPIAIJ,
3235                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3236                                        MatGetColumnNorms_MPIAIJ,
3237                                        MatInvertBlockDiagonal_MPIAIJ,
3238                                        0,
3239                                        MatGetSubMatricesParallel_MPIAIJ,
3240                                 /*129*/0,
3241                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3242                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3243                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3244                                        0,
3245                                 /*134*/0,
3246                                        0,
3247                                        0,
3248                                        0,
3249                                        0,
3250                                 /*139*/0,
3251                                        0
3252 };
3253 
3254 /* ----------------------------------------------------------------------------------------*/
3255 
3256 #undef __FUNCT__
3257 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3258 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3259 {
3260   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3261   PetscErrorCode ierr;
3262 
3263   PetscFunctionBegin;
3264   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3265   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3266   PetscFunctionReturn(0);
3267 }
3268 
3269 #undef __FUNCT__
3270 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3271 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3272 {
3273   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3274   PetscErrorCode ierr;
3275 
3276   PetscFunctionBegin;
3277   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3278   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3279   PetscFunctionReturn(0);
3280 }
3281 
3282 #undef __FUNCT__
3283 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3284 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3285 {
3286   Mat_MPIAIJ     *b;
3287   PetscErrorCode ierr;
3288   PetscInt       i;
3289   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3290 
3291   PetscFunctionBegin;
3292   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3293   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3294   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3295   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3296   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3297   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3298 
3299   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3300   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3301   if (d_nnz) {
3302     for (i=0; i<B->rmap->n; i++) {
3303       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3304     }
3305   }
3306   if (o_nnz) {
3307     for (i=0; i<B->rmap->n; i++) {
3308       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3309     }
3310   }
3311   b = (Mat_MPIAIJ*)B->data;
3312 
3313   if (!B->preallocated) {
3314     /* Explicitly create 2 MATSEQAIJ matrices. */
3315     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3316     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3317     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3318     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3319     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3320     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3321     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3322     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3323     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3324     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3325   }
3326 
3327   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3328   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3329   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3330   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3331   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3332   B->preallocated = PETSC_TRUE;
3333   PetscFunctionReturn(0);
3334 }
3335 
3336 #undef __FUNCT__
3337 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3338 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3339 {
3340   Mat            mat;
3341   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3342   PetscErrorCode ierr;
3343 
3344   PetscFunctionBegin;
3345   *newmat = 0;
3346   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3347   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3348   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3349   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3350   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3351   a       = (Mat_MPIAIJ*)mat->data;
3352 
3353   mat->factortype   = matin->factortype;
3354   mat->rmap->bs     = matin->rmap->bs;
3355   mat->cmap->bs     = matin->cmap->bs;
3356   mat->assembled    = PETSC_TRUE;
3357   mat->insertmode   = NOT_SET_VALUES;
3358   mat->preallocated = PETSC_TRUE;
3359 
3360   a->size         = oldmat->size;
3361   a->rank         = oldmat->rank;
3362   a->donotstash   = oldmat->donotstash;
3363   a->roworiented  = oldmat->roworiented;
3364   a->rowindices   = 0;
3365   a->rowvalues    = 0;
3366   a->getrowactive = PETSC_FALSE;
3367 
3368   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3369   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3370 
3371   if (oldmat->colmap) {
3372 #if defined(PETSC_USE_CTABLE)
3373     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3374 #else
3375     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3376     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3377     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3378 #endif
3379   } else a->colmap = 0;
3380   if (oldmat->garray) {
3381     PetscInt len;
3382     len  = oldmat->B->cmap->n;
3383     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3384     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3385     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3386   } else a->garray = 0;
3387 
3388   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3389   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3390   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3391   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3392   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3393   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3394   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3395   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3396   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3397   *newmat = mat;
3398   PetscFunctionReturn(0);
3399 }
3400 
3401 
3402 
3403 #undef __FUNCT__
3404 #define __FUNCT__ "MatLoad_MPIAIJ"
3405 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3406 {
3407   PetscScalar    *vals,*svals;
3408   MPI_Comm       comm;
3409   PetscErrorCode ierr;
3410   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3411   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3412   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3413   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3414   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3415   int            fd;
3416   PetscInt       bs = 1;
3417 
3418   PetscFunctionBegin;
3419   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3420   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3421   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3422   if (!rank) {
3423     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3424     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3425     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3426   }
3427 
3428   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3429   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3430   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3431 
3432   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3433 
3434   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3435   M    = header[1]; N = header[2];
3436   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3437   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3438   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3439 
3440   /* If global sizes are set, check if they are consistent with that given in the file */
3441   if (sizesset) {
3442     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3443   }
3444   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3445   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3446 
3447   /* determine ownership of all (block) rows */
3448   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3449   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3450   else m = newMat->rmap->n; /* Set by user */
3451 
3452   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3453   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3454 
3455   /* First process needs enough room for process with most rows */
3456   if (!rank) {
3457     mmax = rowners[1];
3458     for (i=2; i<=size; i++) {
3459       mmax = PetscMax(mmax, rowners[i]);
3460     }
3461   } else mmax = -1;             /* unused, but compilers complain */
3462 
3463   rowners[0] = 0;
3464   for (i=2; i<=size; i++) {
3465     rowners[i] += rowners[i-1];
3466   }
3467   rstart = rowners[rank];
3468   rend   = rowners[rank+1];
3469 
3470   /* distribute row lengths to all processors */
3471   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3472   if (!rank) {
3473     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3474     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3475     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3476     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3477     for (j=0; j<m; j++) {
3478       procsnz[0] += ourlens[j];
3479     }
3480     for (i=1; i<size; i++) {
3481       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3482       /* calculate the number of nonzeros on each processor */
3483       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3484         procsnz[i] += rowlengths[j];
3485       }
3486       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3487     }
3488     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3489   } else {
3490     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3491   }
3492 
3493   if (!rank) {
3494     /* determine max buffer needed and allocate it */
3495     maxnz = 0;
3496     for (i=0; i<size; i++) {
3497       maxnz = PetscMax(maxnz,procsnz[i]);
3498     }
3499     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3500 
3501     /* read in my part of the matrix column indices  */
3502     nz   = procsnz[0];
3503     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3504     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3505 
3506     /* read in every one elses and ship off */
3507     for (i=1; i<size; i++) {
3508       nz   = procsnz[i];
3509       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3510       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3511     }
3512     ierr = PetscFree(cols);CHKERRQ(ierr);
3513   } else {
3514     /* determine buffer space needed for message */
3515     nz = 0;
3516     for (i=0; i<m; i++) {
3517       nz += ourlens[i];
3518     }
3519     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3520 
3521     /* receive message of column indices*/
3522     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3523   }
3524 
3525   /* determine column ownership if matrix is not square */
3526   if (N != M) {
3527     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3528     else n = newMat->cmap->n;
3529     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3530     cstart = cend - n;
3531   } else {
3532     cstart = rstart;
3533     cend   = rend;
3534     n      = cend - cstart;
3535   }
3536 
3537   /* loop over local rows, determining number of off diagonal entries */
3538   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3539   jj   = 0;
3540   for (i=0; i<m; i++) {
3541     for (j=0; j<ourlens[i]; j++) {
3542       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3543       jj++;
3544     }
3545   }
3546 
3547   for (i=0; i<m; i++) {
3548     ourlens[i] -= offlens[i];
3549   }
3550   if (!sizesset) {
3551     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3552   }
3553 
3554   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3555 
3556   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3557 
3558   for (i=0; i<m; i++) {
3559     ourlens[i] += offlens[i];
3560   }
3561 
3562   if (!rank) {
3563     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3564 
3565     /* read in my part of the matrix numerical values  */
3566     nz   = procsnz[0];
3567     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3568 
3569     /* insert into matrix */
3570     jj      = rstart;
3571     smycols = mycols;
3572     svals   = vals;
3573     for (i=0; i<m; i++) {
3574       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3575       smycols += ourlens[i];
3576       svals   += ourlens[i];
3577       jj++;
3578     }
3579 
3580     /* read in other processors and ship out */
3581     for (i=1; i<size; i++) {
3582       nz   = procsnz[i];
3583       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3584       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3585     }
3586     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3587   } else {
3588     /* receive numeric values */
3589     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3590 
3591     /* receive message of values*/
3592     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3593 
3594     /* insert into matrix */
3595     jj      = rstart;
3596     smycols = mycols;
3597     svals   = vals;
3598     for (i=0; i<m; i++) {
3599       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3600       smycols += ourlens[i];
3601       svals   += ourlens[i];
3602       jj++;
3603     }
3604   }
3605   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3606   ierr = PetscFree(vals);CHKERRQ(ierr);
3607   ierr = PetscFree(mycols);CHKERRQ(ierr);
3608   ierr = PetscFree(rowners);CHKERRQ(ierr);
3609   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3610   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3611   PetscFunctionReturn(0);
3612 }
3613 
3614 #undef __FUNCT__
3615 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3616 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3617 {
3618   PetscErrorCode ierr;
3619   IS             iscol_local;
3620   PetscInt       csize;
3621 
3622   PetscFunctionBegin;
3623   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3624   if (call == MAT_REUSE_MATRIX) {
3625     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3626     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3627   } else {
3628     PetscInt cbs;
3629     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3630     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3631     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3632   }
3633   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3634   if (call == MAT_INITIAL_MATRIX) {
3635     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3636     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3637   }
3638   PetscFunctionReturn(0);
3639 }
3640 
3641 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3642 #undef __FUNCT__
3643 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3644 /*
3645     Not great since it makes two copies of the submatrix, first an SeqAIJ
3646   in local and then by concatenating the local matrices the end result.
3647   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3648 
3649   Note: This requires a sequential iscol with all indices.
3650 */
3651 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3652 {
3653   PetscErrorCode ierr;
3654   PetscMPIInt    rank,size;
3655   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3656   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3657   PetscBool      allcolumns, colflag;
3658   Mat            M,Mreuse;
3659   MatScalar      *vwork,*aa;
3660   MPI_Comm       comm;
3661   Mat_SeqAIJ     *aij;
3662 
3663   PetscFunctionBegin;
3664   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3665   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3666   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3667 
3668   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3669   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3670   if (colflag && ncol == mat->cmap->N) {
3671     allcolumns = PETSC_TRUE;
3672   } else {
3673     allcolumns = PETSC_FALSE;
3674   }
3675   if (call ==  MAT_REUSE_MATRIX) {
3676     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3677     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3678     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3679   } else {
3680     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3681   }
3682 
3683   /*
3684       m - number of local rows
3685       n - number of columns (same on all processors)
3686       rstart - first row in new global matrix generated
3687   */
3688   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3689   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3690   if (call == MAT_INITIAL_MATRIX) {
3691     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3692     ii  = aij->i;
3693     jj  = aij->j;
3694 
3695     /*
3696         Determine the number of non-zeros in the diagonal and off-diagonal
3697         portions of the matrix in order to do correct preallocation
3698     */
3699 
3700     /* first get start and end of "diagonal" columns */
3701     if (csize == PETSC_DECIDE) {
3702       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3703       if (mglobal == n) { /* square matrix */
3704         nlocal = m;
3705       } else {
3706         nlocal = n/size + ((n % size) > rank);
3707       }
3708     } else {
3709       nlocal = csize;
3710     }
3711     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3712     rstart = rend - nlocal;
3713     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3714 
3715     /* next, compute all the lengths */
3716     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3717     olens = dlens + m;
3718     for (i=0; i<m; i++) {
3719       jend = ii[i+1] - ii[i];
3720       olen = 0;
3721       dlen = 0;
3722       for (j=0; j<jend; j++) {
3723         if (*jj < rstart || *jj >= rend) olen++;
3724         else dlen++;
3725         jj++;
3726       }
3727       olens[i] = olen;
3728       dlens[i] = dlen;
3729     }
3730     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3731     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3732     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3733     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3734     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3735     ierr = PetscFree(dlens);CHKERRQ(ierr);
3736   } else {
3737     PetscInt ml,nl;
3738 
3739     M    = *newmat;
3740     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3741     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3742     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3743     /*
3744          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3745        rather than the slower MatSetValues().
3746     */
3747     M->was_assembled = PETSC_TRUE;
3748     M->assembled     = PETSC_FALSE;
3749   }
3750   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3751   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3752   ii   = aij->i;
3753   jj   = aij->j;
3754   aa   = aij->a;
3755   for (i=0; i<m; i++) {
3756     row   = rstart + i;
3757     nz    = ii[i+1] - ii[i];
3758     cwork = jj;     jj += nz;
3759     vwork = aa;     aa += nz;
3760     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3761   }
3762 
3763   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3764   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3765   *newmat = M;
3766 
3767   /* save submatrix used in processor for next request */
3768   if (call ==  MAT_INITIAL_MATRIX) {
3769     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3770     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3771   }
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 #undef __FUNCT__
3776 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3777 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3778 {
3779   PetscInt       m,cstart, cend,j,nnz,i,d;
3780   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3781   const PetscInt *JJ;
3782   PetscScalar    *values;
3783   PetscErrorCode ierr;
3784 
3785   PetscFunctionBegin;
3786   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3787 
3788   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3789   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3790   m      = B->rmap->n;
3791   cstart = B->cmap->rstart;
3792   cend   = B->cmap->rend;
3793   rstart = B->rmap->rstart;
3794 
3795   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3796 
3797 #if defined(PETSC_USE_DEBUGGING)
3798   for (i=0; i<m; i++) {
3799     nnz = Ii[i+1]- Ii[i];
3800     JJ  = J + Ii[i];
3801     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3802     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3803     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3804   }
3805 #endif
3806 
3807   for (i=0; i<m; i++) {
3808     nnz     = Ii[i+1]- Ii[i];
3809     JJ      = J + Ii[i];
3810     nnz_max = PetscMax(nnz_max,nnz);
3811     d       = 0;
3812     for (j=0; j<nnz; j++) {
3813       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3814     }
3815     d_nnz[i] = d;
3816     o_nnz[i] = nnz - d;
3817   }
3818   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3819   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3820 
3821   if (v) values = (PetscScalar*)v;
3822   else {
3823     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3824     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3825   }
3826 
3827   for (i=0; i<m; i++) {
3828     ii   = i + rstart;
3829     nnz  = Ii[i+1]- Ii[i];
3830     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3831   }
3832   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3833   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3834 
3835   if (!v) {
3836     ierr = PetscFree(values);CHKERRQ(ierr);
3837   }
3838   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3839   PetscFunctionReturn(0);
3840 }
3841 
3842 #undef __FUNCT__
3843 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3844 /*@
3845    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3846    (the default parallel PETSc format).
3847 
3848    Collective on MPI_Comm
3849 
3850    Input Parameters:
3851 +  B - the matrix
3852 .  i - the indices into j for the start of each local row (starts with zero)
3853 .  j - the column indices for each local row (starts with zero)
3854 -  v - optional values in the matrix
3855 
3856    Level: developer
3857 
3858    Notes:
3859        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3860      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3861      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3862 
3863        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3864 
3865        The format which is used for the sparse matrix input, is equivalent to a
3866     row-major ordering.. i.e for the following matrix, the input data expected is
3867     as shown:
3868 
3869         1 0 0
3870         2 0 3     P0
3871        -------
3872         4 5 6     P1
3873 
3874      Process0 [P0]: rows_owned=[0,1]
3875         i =  {0,1,3}  [size = nrow+1  = 2+1]
3876         j =  {0,0,2}  [size = nz = 6]
3877         v =  {1,2,3}  [size = nz = 6]
3878 
3879      Process1 [P1]: rows_owned=[2]
3880         i =  {0,3}    [size = nrow+1  = 1+1]
3881         j =  {0,1,2}  [size = nz = 6]
3882         v =  {4,5,6}  [size = nz = 6]
3883 
3884 .keywords: matrix, aij, compressed row, sparse, parallel
3885 
3886 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3887           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3888 @*/
3889 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3890 {
3891   PetscErrorCode ierr;
3892 
3893   PetscFunctionBegin;
3894   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3895   PetscFunctionReturn(0);
3896 }
3897 
3898 #undef __FUNCT__
3899 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3900 /*@C
3901    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3902    (the default parallel PETSc format).  For good matrix assembly performance
3903    the user should preallocate the matrix storage by setting the parameters
3904    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3905    performance can be increased by more than a factor of 50.
3906 
3907    Collective on MPI_Comm
3908 
3909    Input Parameters:
3910 +  A - the matrix
3911 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3912            (same value is used for all local rows)
3913 .  d_nnz - array containing the number of nonzeros in the various rows of the
3914            DIAGONAL portion of the local submatrix (possibly different for each row)
3915            or NULL, if d_nz is used to specify the nonzero structure.
3916            The size of this array is equal to the number of local rows, i.e 'm'.
3917            For matrices that will be factored, you must leave room for (and set)
3918            the diagonal entry even if it is zero.
3919 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3920            submatrix (same value is used for all local rows).
3921 -  o_nnz - array containing the number of nonzeros in the various rows of the
3922            OFF-DIAGONAL portion of the local submatrix (possibly different for
3923            each row) or NULL, if o_nz is used to specify the nonzero
3924            structure. The size of this array is equal to the number
3925            of local rows, i.e 'm'.
3926 
3927    If the *_nnz parameter is given then the *_nz parameter is ignored
3928 
3929    The AIJ format (also called the Yale sparse matrix format or
3930    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3931    storage.  The stored row and column indices begin with zero.
3932    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3933 
3934    The parallel matrix is partitioned such that the first m0 rows belong to
3935    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3936    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3937 
3938    The DIAGONAL portion of the local submatrix of a processor can be defined
3939    as the submatrix which is obtained by extraction the part corresponding to
3940    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3941    first row that belongs to the processor, r2 is the last row belonging to
3942    the this processor, and c1-c2 is range of indices of the local part of a
3943    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3944    common case of a square matrix, the row and column ranges are the same and
3945    the DIAGONAL part is also square. The remaining portion of the local
3946    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3947 
3948    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3949 
3950    You can call MatGetInfo() to get information on how effective the preallocation was;
3951    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3952    You can also run with the option -info and look for messages with the string
3953    malloc in them to see if additional memory allocation was needed.
3954 
3955    Example usage:
3956 
3957    Consider the following 8x8 matrix with 34 non-zero values, that is
3958    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3959    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3960    as follows:
3961 
3962 .vb
3963             1  2  0  |  0  3  0  |  0  4
3964     Proc0   0  5  6  |  7  0  0  |  8  0
3965             9  0 10  | 11  0  0  | 12  0
3966     -------------------------------------
3967            13  0 14  | 15 16 17  |  0  0
3968     Proc1   0 18  0  | 19 20 21  |  0  0
3969             0  0  0  | 22 23  0  | 24  0
3970     -------------------------------------
3971     Proc2  25 26 27  |  0  0 28  | 29  0
3972            30  0  0  | 31 32 33  |  0 34
3973 .ve
3974 
3975    This can be represented as a collection of submatrices as:
3976 
3977 .vb
3978       A B C
3979       D E F
3980       G H I
3981 .ve
3982 
3983    Where the submatrices A,B,C are owned by proc0, D,E,F are
3984    owned by proc1, G,H,I are owned by proc2.
3985 
3986    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3987    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3988    The 'M','N' parameters are 8,8, and have the same values on all procs.
3989 
3990    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3991    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3992    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3993    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3994    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3995    matrix, ans [DF] as another SeqAIJ matrix.
3996 
3997    When d_nz, o_nz parameters are specified, d_nz storage elements are
3998    allocated for every row of the local diagonal submatrix, and o_nz
3999    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4000    One way to choose d_nz and o_nz is to use the max nonzerors per local
4001    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4002    In this case, the values of d_nz,o_nz are:
4003 .vb
4004      proc0 : dnz = 2, o_nz = 2
4005      proc1 : dnz = 3, o_nz = 2
4006      proc2 : dnz = 1, o_nz = 4
4007 .ve
4008    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4009    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4010    for proc3. i.e we are using 12+15+10=37 storage locations to store
4011    34 values.
4012 
4013    When d_nnz, o_nnz parameters are specified, the storage is specified
4014    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4015    In the above case the values for d_nnz,o_nnz are:
4016 .vb
4017      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4018      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4019      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4020 .ve
4021    Here the space allocated is sum of all the above values i.e 34, and
4022    hence pre-allocation is perfect.
4023 
4024    Level: intermediate
4025 
4026 .keywords: matrix, aij, compressed row, sparse, parallel
4027 
4028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4029           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4030 @*/
4031 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4032 {
4033   PetscErrorCode ierr;
4034 
4035   PetscFunctionBegin;
4036   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4037   PetscValidType(B,1);
4038   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4039   PetscFunctionReturn(0);
4040 }
4041 
4042 #undef __FUNCT__
4043 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4044 /*@
4045      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4046          CSR format the local rows.
4047 
4048    Collective on MPI_Comm
4049 
4050    Input Parameters:
4051 +  comm - MPI communicator
4052 .  m - number of local rows (Cannot be PETSC_DECIDE)
4053 .  n - This value should be the same as the local size used in creating the
4054        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4055        calculated if N is given) For square matrices n is almost always m.
4056 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4057 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4058 .   i - row indices
4059 .   j - column indices
4060 -   a - matrix values
4061 
4062    Output Parameter:
4063 .   mat - the matrix
4064 
4065    Level: intermediate
4066 
4067    Notes:
4068        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4069      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4070      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4071 
4072        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4073 
4074        The format which is used for the sparse matrix input, is equivalent to a
4075     row-major ordering.. i.e for the following matrix, the input data expected is
4076     as shown:
4077 
4078         1 0 0
4079         2 0 3     P0
4080        -------
4081         4 5 6     P1
4082 
4083      Process0 [P0]: rows_owned=[0,1]
4084         i =  {0,1,3}  [size = nrow+1  = 2+1]
4085         j =  {0,0,2}  [size = nz = 6]
4086         v =  {1,2,3}  [size = nz = 6]
4087 
4088      Process1 [P1]: rows_owned=[2]
4089         i =  {0,3}    [size = nrow+1  = 1+1]
4090         j =  {0,1,2}  [size = nz = 6]
4091         v =  {4,5,6}  [size = nz = 6]
4092 
4093 .keywords: matrix, aij, compressed row, sparse, parallel
4094 
4095 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4096           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4097 @*/
4098 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4099 {
4100   PetscErrorCode ierr;
4101 
4102   PetscFunctionBegin;
4103   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4104   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4105   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4106   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4107   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4108   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4109   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4110   PetscFunctionReturn(0);
4111 }
4112 
4113 #undef __FUNCT__
4114 #define __FUNCT__ "MatCreateAIJ"
4115 /*@C
4116    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4117    (the default parallel PETSc format).  For good matrix assembly performance
4118    the user should preallocate the matrix storage by setting the parameters
4119    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4120    performance can be increased by more than a factor of 50.
4121 
4122    Collective on MPI_Comm
4123 
4124    Input Parameters:
4125 +  comm - MPI communicator
4126 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4127            This value should be the same as the local size used in creating the
4128            y vector for the matrix-vector product y = Ax.
4129 .  n - This value should be the same as the local size used in creating the
4130        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4131        calculated if N is given) For square matrices n is almost always m.
4132 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4133 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4134 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4135            (same value is used for all local rows)
4136 .  d_nnz - array containing the number of nonzeros in the various rows of the
4137            DIAGONAL portion of the local submatrix (possibly different for each row)
4138            or NULL, if d_nz is used to specify the nonzero structure.
4139            The size of this array is equal to the number of local rows, i.e 'm'.
4140 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4141            submatrix (same value is used for all local rows).
4142 -  o_nnz - array containing the number of nonzeros in the various rows of the
4143            OFF-DIAGONAL portion of the local submatrix (possibly different for
4144            each row) or NULL, if o_nz is used to specify the nonzero
4145            structure. The size of this array is equal to the number
4146            of local rows, i.e 'm'.
4147 
4148    Output Parameter:
4149 .  A - the matrix
4150 
4151    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4152    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4153    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4154 
4155    Notes:
4156    If the *_nnz parameter is given then the *_nz parameter is ignored
4157 
4158    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4159    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4160    storage requirements for this matrix.
4161 
4162    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4163    processor than it must be used on all processors that share the object for
4164    that argument.
4165 
4166    The user MUST specify either the local or global matrix dimensions
4167    (possibly both).
4168 
4169    The parallel matrix is partitioned across processors such that the
4170    first m0 rows belong to process 0, the next m1 rows belong to
4171    process 1, the next m2 rows belong to process 2 etc.. where
4172    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4173    values corresponding to [m x N] submatrix.
4174 
4175    The columns are logically partitioned with the n0 columns belonging
4176    to 0th partition, the next n1 columns belonging to the next
4177    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4178 
4179    The DIAGONAL portion of the local submatrix on any given processor
4180    is the submatrix corresponding to the rows and columns m,n
4181    corresponding to the given processor. i.e diagonal matrix on
4182    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4183    etc. The remaining portion of the local submatrix [m x (N-n)]
4184    constitute the OFF-DIAGONAL portion. The example below better
4185    illustrates this concept.
4186 
4187    For a square global matrix we define each processor's diagonal portion
4188    to be its local rows and the corresponding columns (a square submatrix);
4189    each processor's off-diagonal portion encompasses the remainder of the
4190    local matrix (a rectangular submatrix).
4191 
4192    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4193 
4194    When calling this routine with a single process communicator, a matrix of
4195    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4196    type of communicator, use the construction mechanism:
4197      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4198 
4199    By default, this format uses inodes (identical nodes) when possible.
4200    We search for consecutive rows with the same nonzero structure, thereby
4201    reusing matrix information to achieve increased efficiency.
4202 
4203    Options Database Keys:
4204 +  -mat_no_inode  - Do not use inodes
4205 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4206 -  -mat_aij_oneindex - Internally use indexing starting at 1
4207         rather than 0.  Note that when calling MatSetValues(),
4208         the user still MUST index entries starting at 0!
4209 
4210 
4211    Example usage:
4212 
4213    Consider the following 8x8 matrix with 34 non-zero values, that is
4214    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4215    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4216    as follows:
4217 
4218 .vb
4219             1  2  0  |  0  3  0  |  0  4
4220     Proc0   0  5  6  |  7  0  0  |  8  0
4221             9  0 10  | 11  0  0  | 12  0
4222     -------------------------------------
4223            13  0 14  | 15 16 17  |  0  0
4224     Proc1   0 18  0  | 19 20 21  |  0  0
4225             0  0  0  | 22 23  0  | 24  0
4226     -------------------------------------
4227     Proc2  25 26 27  |  0  0 28  | 29  0
4228            30  0  0  | 31 32 33  |  0 34
4229 .ve
4230 
4231    This can be represented as a collection of submatrices as:
4232 
4233 .vb
4234       A B C
4235       D E F
4236       G H I
4237 .ve
4238 
4239    Where the submatrices A,B,C are owned by proc0, D,E,F are
4240    owned by proc1, G,H,I are owned by proc2.
4241 
4242    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4243    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4244    The 'M','N' parameters are 8,8, and have the same values on all procs.
4245 
4246    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4247    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4248    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4249    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4250    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4251    matrix, ans [DF] as another SeqAIJ matrix.
4252 
4253    When d_nz, o_nz parameters are specified, d_nz storage elements are
4254    allocated for every row of the local diagonal submatrix, and o_nz
4255    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4256    One way to choose d_nz and o_nz is to use the max nonzerors per local
4257    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4258    In this case, the values of d_nz,o_nz are:
4259 .vb
4260      proc0 : dnz = 2, o_nz = 2
4261      proc1 : dnz = 3, o_nz = 2
4262      proc2 : dnz = 1, o_nz = 4
4263 .ve
4264    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4265    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4266    for proc3. i.e we are using 12+15+10=37 storage locations to store
4267    34 values.
4268 
4269    When d_nnz, o_nnz parameters are specified, the storage is specified
4270    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4271    In the above case the values for d_nnz,o_nnz are:
4272 .vb
4273      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4274      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4275      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4276 .ve
4277    Here the space allocated is sum of all the above values i.e 34, and
4278    hence pre-allocation is perfect.
4279 
4280    Level: intermediate
4281 
4282 .keywords: matrix, aij, compressed row, sparse, parallel
4283 
4284 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4285           MPIAIJ, MatCreateMPIAIJWithArrays()
4286 @*/
4287 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4288 {
4289   PetscErrorCode ierr;
4290   PetscMPIInt    size;
4291 
4292   PetscFunctionBegin;
4293   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4294   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4295   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4296   if (size > 1) {
4297     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4298     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4299   } else {
4300     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4301     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4302   }
4303   PetscFunctionReturn(0);
4304 }
4305 
4306 #undef __FUNCT__
4307 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4308 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4309 {
4310   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4311 
4312   PetscFunctionBegin;
4313   *Ad     = a->A;
4314   *Ao     = a->B;
4315   *colmap = a->garray;
4316   PetscFunctionReturn(0);
4317 }
4318 
4319 #undef __FUNCT__
4320 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4321 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4322 {
4323   PetscErrorCode ierr;
4324   PetscInt       i;
4325   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4326 
4327   PetscFunctionBegin;
4328   if (coloring->ctype == IS_COLORING_GLOBAL) {
4329     ISColoringValue *allcolors,*colors;
4330     ISColoring      ocoloring;
4331 
4332     /* set coloring for diagonal portion */
4333     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4334 
4335     /* set coloring for off-diagonal portion */
4336     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4337     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4338     for (i=0; i<a->B->cmap->n; i++) {
4339       colors[i] = allcolors[a->garray[i]];
4340     }
4341     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4342     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4343     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4344     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4345   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4346     ISColoringValue *colors;
4347     PetscInt        *larray;
4348     ISColoring      ocoloring;
4349 
4350     /* set coloring for diagonal portion */
4351     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4352     for (i=0; i<a->A->cmap->n; i++) {
4353       larray[i] = i + A->cmap->rstart;
4354     }
4355     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4356     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4357     for (i=0; i<a->A->cmap->n; i++) {
4358       colors[i] = coloring->colors[larray[i]];
4359     }
4360     ierr = PetscFree(larray);CHKERRQ(ierr);
4361     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4362     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4363     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4364 
4365     /* set coloring for off-diagonal portion */
4366     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4367     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4368     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4369     for (i=0; i<a->B->cmap->n; i++) {
4370       colors[i] = coloring->colors[larray[i]];
4371     }
4372     ierr = PetscFree(larray);CHKERRQ(ierr);
4373     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4374     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4375     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4376   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4377   PetscFunctionReturn(0);
4378 }
4379 
4380 #undef __FUNCT__
4381 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4382 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4383 {
4384   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4385   PetscErrorCode ierr;
4386 
4387   PetscFunctionBegin;
4388   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4389   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4390   PetscFunctionReturn(0);
4391 }
4392 
4393 #undef __FUNCT__
4394 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4395 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4396 {
4397   PetscErrorCode ierr;
4398   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4399   PetscInt       *indx;
4400 
4401   PetscFunctionBegin;
4402   /* This routine will ONLY return MPIAIJ type matrix */
4403   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4404   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4405   if (n == PETSC_DECIDE) {
4406     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4407   }
4408   /* Check sum(n) = N */
4409   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4410   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4411 
4412   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4413   rstart -= m;
4414 
4415   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4416   for (i=0; i<m; i++) {
4417     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4418     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4419     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4420   }
4421 
4422   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4423   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4424   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4425   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4426   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4427   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 #undef __FUNCT__
4432 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4433 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4434 {
4435   PetscErrorCode ierr;
4436   PetscInt       m,N,i,rstart,nnz,Ii;
4437   PetscInt       *indx;
4438   PetscScalar    *values;
4439 
4440   PetscFunctionBegin;
4441   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4442   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4443   for (i=0; i<m; i++) {
4444     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4445     Ii   = i + rstart;
4446     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4447     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4448   }
4449   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4450   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4451   PetscFunctionReturn(0);
4452 }
4453 
4454 #undef __FUNCT__
4455 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4456 /*@
4457       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4458                  matrices from each processor
4459 
4460     Collective on MPI_Comm
4461 
4462    Input Parameters:
4463 +    comm - the communicators the parallel matrix will live on
4464 .    inmat - the input sequential matrices
4465 .    n - number of local columns (or PETSC_DECIDE)
4466 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4467 
4468    Output Parameter:
4469 .    outmat - the parallel matrix generated
4470 
4471     Level: advanced
4472 
4473    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4474 
4475 @*/
4476 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4477 {
4478   PetscErrorCode ierr;
4479 
4480   PetscFunctionBegin;
4481   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4482   if (scall == MAT_INITIAL_MATRIX) {
4483     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4484   }
4485   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4486   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4487   PetscFunctionReturn(0);
4488 }
4489 
4490 #undef __FUNCT__
4491 #define __FUNCT__ "MatFileSplit"
4492 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4493 {
4494   PetscErrorCode    ierr;
4495   PetscMPIInt       rank;
4496   PetscInt          m,N,i,rstart,nnz;
4497   size_t            len;
4498   const PetscInt    *indx;
4499   PetscViewer       out;
4500   char              *name;
4501   Mat               B;
4502   const PetscScalar *values;
4503 
4504   PetscFunctionBegin;
4505   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4506   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4507   /* Should this be the type of the diagonal block of A? */
4508   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4509   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4510   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4511   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4512   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4513   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4514   for (i=0; i<m; i++) {
4515     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4516     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4517     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4518   }
4519   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4520   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4521 
4522   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4523   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4524   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4525   sprintf(name,"%s.%d",outfile,rank);
4526   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4527   ierr = PetscFree(name);CHKERRQ(ierr);
4528   ierr = MatView(B,out);CHKERRQ(ierr);
4529   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4530   ierr = MatDestroy(&B);CHKERRQ(ierr);
4531   PetscFunctionReturn(0);
4532 }
4533 
4534 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4535 #undef __FUNCT__
4536 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4537 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4538 {
4539   PetscErrorCode      ierr;
4540   Mat_Merge_SeqsToMPI *merge;
4541   PetscContainer      container;
4542 
4543   PetscFunctionBegin;
4544   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4545   if (container) {
4546     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4557     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4558     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4559     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4560     ierr = PetscFree(merge);CHKERRQ(ierr);
4561     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4562   }
4563   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4564   PetscFunctionReturn(0);
4565 }
4566 
4567 #include <../src/mat/utils/freespace.h>
4568 #include <petscbt.h>
4569 
4570 #undef __FUNCT__
4571 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4572 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4573 {
4574   PetscErrorCode      ierr;
4575   MPI_Comm            comm;
4576   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4577   PetscMPIInt         size,rank,taga,*len_s;
4578   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4579   PetscInt            proc,m;
4580   PetscInt            **buf_ri,**buf_rj;
4581   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4582   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4583   MPI_Request         *s_waits,*r_waits;
4584   MPI_Status          *status;
4585   MatScalar           *aa=a->a;
4586   MatScalar           **abuf_r,*ba_i;
4587   Mat_Merge_SeqsToMPI *merge;
4588   PetscContainer      container;
4589 
4590   PetscFunctionBegin;
4591   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4592   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4593 
4594   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4595   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4596 
4597   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4598   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4599 
4600   bi     = merge->bi;
4601   bj     = merge->bj;
4602   buf_ri = merge->buf_ri;
4603   buf_rj = merge->buf_rj;
4604 
4605   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4606   owners = merge->rowmap->range;
4607   len_s  = merge->len_s;
4608 
4609   /* send and recv matrix values */
4610   /*-----------------------------*/
4611   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4612   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4613 
4614   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4615   for (proc=0,k=0; proc<size; proc++) {
4616     if (!len_s[proc]) continue;
4617     i    = owners[proc];
4618     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4619     k++;
4620   }
4621 
4622   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4623   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4624   ierr = PetscFree(status);CHKERRQ(ierr);
4625 
4626   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4627   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4628 
4629   /* insert mat values of mpimat */
4630   /*----------------------------*/
4631   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4632   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4633 
4634   for (k=0; k<merge->nrecv; k++) {
4635     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4636     nrows       = *(buf_ri_k[k]);
4637     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4638     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4639   }
4640 
4641   /* set values of ba */
4642   m = merge->rowmap->n;
4643   for (i=0; i<m; i++) {
4644     arow = owners[rank] + i;
4645     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4646     bnzi = bi[i+1] - bi[i];
4647     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4648 
4649     /* add local non-zero vals of this proc's seqmat into ba */
4650     anzi   = ai[arow+1] - ai[arow];
4651     aj     = a->j + ai[arow];
4652     aa     = a->a + ai[arow];
4653     nextaj = 0;
4654     for (j=0; nextaj<anzi; j++) {
4655       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4656         ba_i[j] += aa[nextaj++];
4657       }
4658     }
4659 
4660     /* add received vals into ba */
4661     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4662       /* i-th row */
4663       if (i == *nextrow[k]) {
4664         anzi   = *(nextai[k]+1) - *nextai[k];
4665         aj     = buf_rj[k] + *(nextai[k]);
4666         aa     = abuf_r[k] + *(nextai[k]);
4667         nextaj = 0;
4668         for (j=0; nextaj<anzi; j++) {
4669           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4670             ba_i[j] += aa[nextaj++];
4671           }
4672         }
4673         nextrow[k]++; nextai[k]++;
4674       }
4675     }
4676     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4677   }
4678   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4679   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4680 
4681   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4682   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4683   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4684   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4685   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4686   PetscFunctionReturn(0);
4687 }
4688 
4689 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4690 
4691 #undef __FUNCT__
4692 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4693 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4694 {
4695   PetscErrorCode      ierr;
4696   Mat                 B_mpi;
4697   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4698   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4699   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4700   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4701   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4702   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4703   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4704   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4705   MPI_Status          *status;
4706   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4707   PetscBT             lnkbt;
4708   Mat_Merge_SeqsToMPI *merge;
4709   PetscContainer      container;
4710 
4711   PetscFunctionBegin;
4712   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4713 
4714   /* make sure it is a PETSc comm */
4715   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4716   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4717   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4718 
4719   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4720   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4721 
4722   /* determine row ownership */
4723   /*---------------------------------------------------------*/
4724   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4725   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4726   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4727   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4728   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4729   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4730   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4731 
4732   m      = merge->rowmap->n;
4733   owners = merge->rowmap->range;
4734 
4735   /* determine the number of messages to send, their lengths */
4736   /*---------------------------------------------------------*/
4737   len_s = merge->len_s;
4738 
4739   len          = 0; /* length of buf_si[] */
4740   merge->nsend = 0;
4741   for (proc=0; proc<size; proc++) {
4742     len_si[proc] = 0;
4743     if (proc == rank) {
4744       len_s[proc] = 0;
4745     } else {
4746       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4747       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4748     }
4749     if (len_s[proc]) {
4750       merge->nsend++;
4751       nrows = 0;
4752       for (i=owners[proc]; i<owners[proc+1]; i++) {
4753         if (ai[i+1] > ai[i]) nrows++;
4754       }
4755       len_si[proc] = 2*(nrows+1);
4756       len         += len_si[proc];
4757     }
4758   }
4759 
4760   /* determine the number and length of messages to receive for ij-structure */
4761   /*-------------------------------------------------------------------------*/
4762   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4763   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4764 
4765   /* post the Irecv of j-structure */
4766   /*-------------------------------*/
4767   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4768   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4769 
4770   /* post the Isend of j-structure */
4771   /*--------------------------------*/
4772   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4773 
4774   for (proc=0, k=0; proc<size; proc++) {
4775     if (!len_s[proc]) continue;
4776     i    = owners[proc];
4777     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4778     k++;
4779   }
4780 
4781   /* receives and sends of j-structure are complete */
4782   /*------------------------------------------------*/
4783   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4784   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4785 
4786   /* send and recv i-structure */
4787   /*---------------------------*/
4788   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4789   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4790 
4791   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4792   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4793   for (proc=0,k=0; proc<size; proc++) {
4794     if (!len_s[proc]) continue;
4795     /* form outgoing message for i-structure:
4796          buf_si[0]:                 nrows to be sent
4797                [1:nrows]:           row index (global)
4798                [nrows+1:2*nrows+1]: i-structure index
4799     */
4800     /*-------------------------------------------*/
4801     nrows       = len_si[proc]/2 - 1;
4802     buf_si_i    = buf_si + nrows+1;
4803     buf_si[0]   = nrows;
4804     buf_si_i[0] = 0;
4805     nrows       = 0;
4806     for (i=owners[proc]; i<owners[proc+1]; i++) {
4807       anzi = ai[i+1] - ai[i];
4808       if (anzi) {
4809         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4810         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4811         nrows++;
4812       }
4813     }
4814     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4815     k++;
4816     buf_si += len_si[proc];
4817   }
4818 
4819   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4820   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4821 
4822   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4823   for (i=0; i<merge->nrecv; i++) {
4824     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4825   }
4826 
4827   ierr = PetscFree(len_si);CHKERRQ(ierr);
4828   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4829   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4830   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4831   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4832   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4833   ierr = PetscFree(status);CHKERRQ(ierr);
4834 
4835   /* compute a local seq matrix in each processor */
4836   /*----------------------------------------------*/
4837   /* allocate bi array and free space for accumulating nonzero column info */
4838   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4839   bi[0] = 0;
4840 
4841   /* create and initialize a linked list */
4842   nlnk = N+1;
4843   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4844 
4845   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4846   len  = ai[owners[rank+1]] - ai[owners[rank]];
4847   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4848 
4849   current_space = free_space;
4850 
4851   /* determine symbolic info for each local row */
4852   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4853 
4854   for (k=0; k<merge->nrecv; k++) {
4855     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4856     nrows       = *buf_ri_k[k];
4857     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4858     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4859   }
4860 
4861   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4862   len  = 0;
4863   for (i=0; i<m; i++) {
4864     bnzi = 0;
4865     /* add local non-zero cols of this proc's seqmat into lnk */
4866     arow  = owners[rank] + i;
4867     anzi  = ai[arow+1] - ai[arow];
4868     aj    = a->j + ai[arow];
4869     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4870     bnzi += nlnk;
4871     /* add received col data into lnk */
4872     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4873       if (i == *nextrow[k]) { /* i-th row */
4874         anzi  = *(nextai[k]+1) - *nextai[k];
4875         aj    = buf_rj[k] + *nextai[k];
4876         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4877         bnzi += nlnk;
4878         nextrow[k]++; nextai[k]++;
4879       }
4880     }
4881     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4882 
4883     /* if free space is not available, make more free space */
4884     if (current_space->local_remaining<bnzi) {
4885       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4886       nspacedouble++;
4887     }
4888     /* copy data into free space, then initialize lnk */
4889     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4890     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4891 
4892     current_space->array           += bnzi;
4893     current_space->local_used      += bnzi;
4894     current_space->local_remaining -= bnzi;
4895 
4896     bi[i+1] = bi[i] + bnzi;
4897   }
4898 
4899   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4900 
4901   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4902   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4903   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4904 
4905   /* create symbolic parallel matrix B_mpi */
4906   /*---------------------------------------*/
4907   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4908   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4909   if (n==PETSC_DECIDE) {
4910     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4911   } else {
4912     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4913   }
4914   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4915   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4916   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4917   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4918   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4919 
4920   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4921   B_mpi->assembled    = PETSC_FALSE;
4922   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4923   merge->bi           = bi;
4924   merge->bj           = bj;
4925   merge->buf_ri       = buf_ri;
4926   merge->buf_rj       = buf_rj;
4927   merge->coi          = NULL;
4928   merge->coj          = NULL;
4929   merge->owners_co    = NULL;
4930 
4931   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4932 
4933   /* attach the supporting struct to B_mpi for reuse */
4934   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4935   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4936   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4937   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4938   *mpimat = B_mpi;
4939 
4940   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4941   PetscFunctionReturn(0);
4942 }
4943 
4944 #undef __FUNCT__
4945 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4946 /*@C
4947       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4948                  matrices from each processor
4949 
4950     Collective on MPI_Comm
4951 
4952    Input Parameters:
4953 +    comm - the communicators the parallel matrix will live on
4954 .    seqmat - the input sequential matrices
4955 .    m - number of local rows (or PETSC_DECIDE)
4956 .    n - number of local columns (or PETSC_DECIDE)
4957 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4958 
4959    Output Parameter:
4960 .    mpimat - the parallel matrix generated
4961 
4962     Level: advanced
4963 
4964    Notes:
4965      The dimensions of the sequential matrix in each processor MUST be the same.
4966      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4967      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4968 @*/
4969 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4970 {
4971   PetscErrorCode ierr;
4972   PetscMPIInt    size;
4973 
4974   PetscFunctionBegin;
4975   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4976   if (size == 1) {
4977     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4978     if (scall == MAT_INITIAL_MATRIX) {
4979       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4980     } else {
4981       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4982     }
4983     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4984     PetscFunctionReturn(0);
4985   }
4986   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4987   if (scall == MAT_INITIAL_MATRIX) {
4988     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4989   }
4990   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4991   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4992   PetscFunctionReturn(0);
4993 }
4994 
4995 #undef __FUNCT__
4996 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4997 /*@
4998      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4999           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5000           with MatGetSize()
5001 
5002     Not Collective
5003 
5004    Input Parameters:
5005 +    A - the matrix
5006 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5007 
5008    Output Parameter:
5009 .    A_loc - the local sequential matrix generated
5010 
5011     Level: developer
5012 
5013 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5014 
5015 @*/
5016 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5017 {
5018   PetscErrorCode ierr;
5019   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5020   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5021   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5022   MatScalar      *aa=a->a,*ba=b->a,*cam;
5023   PetscScalar    *ca;
5024   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5025   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5026   PetscBool      match;
5027 
5028   PetscFunctionBegin;
5029   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5030   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5031   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5032   if (scall == MAT_INITIAL_MATRIX) {
5033     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5034     ci[0] = 0;
5035     for (i=0; i<am; i++) {
5036       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5037     }
5038     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5039     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5040     k    = 0;
5041     for (i=0; i<am; i++) {
5042       ncols_o = bi[i+1] - bi[i];
5043       ncols_d = ai[i+1] - ai[i];
5044       /* off-diagonal portion of A */
5045       for (jo=0; jo<ncols_o; jo++) {
5046         col = cmap[*bj];
5047         if (col >= cstart) break;
5048         cj[k]   = col; bj++;
5049         ca[k++] = *ba++;
5050       }
5051       /* diagonal portion of A */
5052       for (j=0; j<ncols_d; j++) {
5053         cj[k]   = cstart + *aj++;
5054         ca[k++] = *aa++;
5055       }
5056       /* off-diagonal portion of A */
5057       for (j=jo; j<ncols_o; j++) {
5058         cj[k]   = cmap[*bj++];
5059         ca[k++] = *ba++;
5060       }
5061     }
5062     /* put together the new matrix */
5063     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5064     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5065     /* Since these are PETSc arrays, change flags to free them as necessary. */
5066     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5067     mat->free_a  = PETSC_TRUE;
5068     mat->free_ij = PETSC_TRUE;
5069     mat->nonew   = 0;
5070   } else if (scall == MAT_REUSE_MATRIX) {
5071     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5072     ci = mat->i; cj = mat->j; cam = mat->a;
5073     for (i=0; i<am; i++) {
5074       /* off-diagonal portion of A */
5075       ncols_o = bi[i+1] - bi[i];
5076       for (jo=0; jo<ncols_o; jo++) {
5077         col = cmap[*bj];
5078         if (col >= cstart) break;
5079         *cam++ = *ba++; bj++;
5080       }
5081       /* diagonal portion of A */
5082       ncols_d = ai[i+1] - ai[i];
5083       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5084       /* off-diagonal portion of A */
5085       for (j=jo; j<ncols_o; j++) {
5086         *cam++ = *ba++; bj++;
5087       }
5088     }
5089   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5090   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5091   PetscFunctionReturn(0);
5092 }
5093 
5094 #undef __FUNCT__
5095 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5096 /*@C
5097      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5098 
5099     Not Collective
5100 
5101    Input Parameters:
5102 +    A - the matrix
5103 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5104 -    row, col - index sets of rows and columns to extract (or NULL)
5105 
5106    Output Parameter:
5107 .    A_loc - the local sequential matrix generated
5108 
5109     Level: developer
5110 
5111 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5112 
5113 @*/
5114 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5115 {
5116   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5117   PetscErrorCode ierr;
5118   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5119   IS             isrowa,iscola;
5120   Mat            *aloc;
5121   PetscBool      match;
5122 
5123   PetscFunctionBegin;
5124   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5125   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5126   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5127   if (!row) {
5128     start = A->rmap->rstart; end = A->rmap->rend;
5129     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5130   } else {
5131     isrowa = *row;
5132   }
5133   if (!col) {
5134     start = A->cmap->rstart;
5135     cmap  = a->garray;
5136     nzA   = a->A->cmap->n;
5137     nzB   = a->B->cmap->n;
5138     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5139     ncols = 0;
5140     for (i=0; i<nzB; i++) {
5141       if (cmap[i] < start) idx[ncols++] = cmap[i];
5142       else break;
5143     }
5144     imark = i;
5145     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5146     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5147     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5148   } else {
5149     iscola = *col;
5150   }
5151   if (scall != MAT_INITIAL_MATRIX) {
5152     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5153     aloc[0] = *A_loc;
5154   }
5155   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5156   *A_loc = aloc[0];
5157   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5158   if (!row) {
5159     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5160   }
5161   if (!col) {
5162     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5163   }
5164   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5165   PetscFunctionReturn(0);
5166 }
5167 
5168 #undef __FUNCT__
5169 #define __FUNCT__ "MatGetBrowsOfAcols"
5170 /*@C
5171     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5172 
5173     Collective on Mat
5174 
5175    Input Parameters:
5176 +    A,B - the matrices in mpiaij format
5177 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5178 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5179 
5180    Output Parameter:
5181 +    rowb, colb - index sets of rows and columns of B to extract
5182 -    B_seq - the sequential matrix generated
5183 
5184     Level: developer
5185 
5186 @*/
5187 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5188 {
5189   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5190   PetscErrorCode ierr;
5191   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5192   IS             isrowb,iscolb;
5193   Mat            *bseq=NULL;
5194 
5195   PetscFunctionBegin;
5196   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5197     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5198   }
5199   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5200 
5201   if (scall == MAT_INITIAL_MATRIX) {
5202     start = A->cmap->rstart;
5203     cmap  = a->garray;
5204     nzA   = a->A->cmap->n;
5205     nzB   = a->B->cmap->n;
5206     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5207     ncols = 0;
5208     for (i=0; i<nzB; i++) {  /* row < local row index */
5209       if (cmap[i] < start) idx[ncols++] = cmap[i];
5210       else break;
5211     }
5212     imark = i;
5213     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5214     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5215     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5216     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5217   } else {
5218     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5219     isrowb  = *rowb; iscolb = *colb;
5220     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5221     bseq[0] = *B_seq;
5222   }
5223   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5224   *B_seq = bseq[0];
5225   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5226   if (!rowb) {
5227     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5228   } else {
5229     *rowb = isrowb;
5230   }
5231   if (!colb) {
5232     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5233   } else {
5234     *colb = iscolb;
5235   }
5236   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5237   PetscFunctionReturn(0);
5238 }
5239 
5240 #undef __FUNCT__
5241 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5242 /*
5243     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5244     of the OFF-DIAGONAL portion of local A
5245 
5246     Collective on Mat
5247 
5248    Input Parameters:
5249 +    A,B - the matrices in mpiaij format
5250 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5251 
5252    Output Parameter:
5253 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5254 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5255 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5256 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5257 
5258     Level: developer
5259 
5260 */
5261 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5262 {
5263   VecScatter_MPI_General *gen_to,*gen_from;
5264   PetscErrorCode         ierr;
5265   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5266   Mat_SeqAIJ             *b_oth;
5267   VecScatter             ctx =a->Mvctx;
5268   MPI_Comm               comm;
5269   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5270   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5271   PetscScalar            *rvalues,*svalues;
5272   MatScalar              *b_otha,*bufa,*bufA;
5273   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5274   MPI_Request            *rwaits = NULL,*swaits = NULL;
5275   MPI_Status             *sstatus,rstatus;
5276   PetscMPIInt            jj;
5277   PetscInt               *cols,sbs,rbs;
5278   PetscScalar            *vals;
5279 
5280   PetscFunctionBegin;
5281   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5282   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5283     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5284   }
5285   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5286   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5287 
5288   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5289   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5290   rvalues  = gen_from->values; /* holds the length of receiving row */
5291   svalues  = gen_to->values;   /* holds the length of sending row */
5292   nrecvs   = gen_from->n;
5293   nsends   = gen_to->n;
5294 
5295   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5296   srow    = gen_to->indices;    /* local row index to be sent */
5297   sstarts = gen_to->starts;
5298   sprocs  = gen_to->procs;
5299   sstatus = gen_to->sstatus;
5300   sbs     = gen_to->bs;
5301   rstarts = gen_from->starts;
5302   rprocs  = gen_from->procs;
5303   rbs     = gen_from->bs;
5304 
5305   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5306   if (scall == MAT_INITIAL_MATRIX) {
5307     /* i-array */
5308     /*---------*/
5309     /*  post receives */
5310     for (i=0; i<nrecvs; i++) {
5311       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5312       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5313       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5314     }
5315 
5316     /* pack the outgoing message */
5317     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5318 
5319     sstartsj[0] = 0;
5320     rstartsj[0] = 0;
5321     len         = 0; /* total length of j or a array to be sent */
5322     k           = 0;
5323     for (i=0; i<nsends; i++) {
5324       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5325       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5326       for (j=0; j<nrows; j++) {
5327         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5328         for (l=0; l<sbs; l++) {
5329           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5330 
5331           rowlen[j*sbs+l] = ncols;
5332 
5333           len += ncols;
5334           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5335         }
5336         k++;
5337       }
5338       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5339 
5340       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5341     }
5342     /* recvs and sends of i-array are completed */
5343     i = nrecvs;
5344     while (i--) {
5345       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5346     }
5347     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5348 
5349     /* allocate buffers for sending j and a arrays */
5350     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5351     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5352 
5353     /* create i-array of B_oth */
5354     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5355 
5356     b_othi[0] = 0;
5357     len       = 0; /* total length of j or a array to be received */
5358     k         = 0;
5359     for (i=0; i<nrecvs; i++) {
5360       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5361       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5362       for (j=0; j<nrows; j++) {
5363         b_othi[k+1] = b_othi[k] + rowlen[j];
5364         len        += rowlen[j]; k++;
5365       }
5366       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5367     }
5368 
5369     /* allocate space for j and a arrrays of B_oth */
5370     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5371     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5372 
5373     /* j-array */
5374     /*---------*/
5375     /*  post receives of j-array */
5376     for (i=0; i<nrecvs; i++) {
5377       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5378       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5379     }
5380 
5381     /* pack the outgoing message j-array */
5382     k = 0;
5383     for (i=0; i<nsends; i++) {
5384       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5385       bufJ  = bufj+sstartsj[i];
5386       for (j=0; j<nrows; j++) {
5387         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5388         for (ll=0; ll<sbs; ll++) {
5389           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5390           for (l=0; l<ncols; l++) {
5391             *bufJ++ = cols[l];
5392           }
5393           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5394         }
5395       }
5396       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5397     }
5398 
5399     /* recvs and sends of j-array are completed */
5400     i = nrecvs;
5401     while (i--) {
5402       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5403     }
5404     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5405   } else if (scall == MAT_REUSE_MATRIX) {
5406     sstartsj = *startsj_s;
5407     rstartsj = *startsj_r;
5408     bufa     = *bufa_ptr;
5409     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5410     b_otha   = b_oth->a;
5411   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5412 
5413   /* a-array */
5414   /*---------*/
5415   /*  post receives of a-array */
5416   for (i=0; i<nrecvs; i++) {
5417     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5418     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5419   }
5420 
5421   /* pack the outgoing message a-array */
5422   k = 0;
5423   for (i=0; i<nsends; i++) {
5424     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5425     bufA  = bufa+sstartsj[i];
5426     for (j=0; j<nrows; j++) {
5427       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5428       for (ll=0; ll<sbs; ll++) {
5429         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5430         for (l=0; l<ncols; l++) {
5431           *bufA++ = vals[l];
5432         }
5433         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5434       }
5435     }
5436     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5437   }
5438   /* recvs and sends of a-array are completed */
5439   i = nrecvs;
5440   while (i--) {
5441     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5442   }
5443   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5444   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5445 
5446   if (scall == MAT_INITIAL_MATRIX) {
5447     /* put together the new matrix */
5448     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5449 
5450     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5451     /* Since these are PETSc arrays, change flags to free them as necessary. */
5452     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5453     b_oth->free_a  = PETSC_TRUE;
5454     b_oth->free_ij = PETSC_TRUE;
5455     b_oth->nonew   = 0;
5456 
5457     ierr = PetscFree(bufj);CHKERRQ(ierr);
5458     if (!startsj_s || !bufa_ptr) {
5459       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5460       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5461     } else {
5462       *startsj_s = sstartsj;
5463       *startsj_r = rstartsj;
5464       *bufa_ptr  = bufa;
5465     }
5466   }
5467   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5468   PetscFunctionReturn(0);
5469 }
5470 
5471 #undef __FUNCT__
5472 #define __FUNCT__ "MatGetCommunicationStructs"
5473 /*@C
5474   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5475 
5476   Not Collective
5477 
5478   Input Parameters:
5479 . A - The matrix in mpiaij format
5480 
5481   Output Parameter:
5482 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5483 . colmap - A map from global column index to local index into lvec
5484 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5485 
5486   Level: developer
5487 
5488 @*/
5489 #if defined(PETSC_USE_CTABLE)
5490 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5491 #else
5492 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5493 #endif
5494 {
5495   Mat_MPIAIJ *a;
5496 
5497   PetscFunctionBegin;
5498   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5499   PetscValidPointer(lvec, 2);
5500   PetscValidPointer(colmap, 3);
5501   PetscValidPointer(multScatter, 4);
5502   a = (Mat_MPIAIJ*) A->data;
5503   if (lvec) *lvec = a->lvec;
5504   if (colmap) *colmap = a->colmap;
5505   if (multScatter) *multScatter = a->Mvctx;
5506   PetscFunctionReturn(0);
5507 }
5508 
5509 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5510 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5511 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5512 
5513 #undef __FUNCT__
5514 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5515 /*
5516     Computes (B'*A')' since computing B*A directly is untenable
5517 
5518                n                       p                          p
5519         (              )       (              )         (                  )
5520       m (      A       )  *  n (       B      )   =   m (         C        )
5521         (              )       (              )         (                  )
5522 
5523 */
5524 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5525 {
5526   PetscErrorCode ierr;
5527   Mat            At,Bt,Ct;
5528 
5529   PetscFunctionBegin;
5530   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5531   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5532   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5533   ierr = MatDestroy(&At);CHKERRQ(ierr);
5534   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5535   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5536   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5537   PetscFunctionReturn(0);
5538 }
5539 
5540 #undef __FUNCT__
5541 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5542 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5543 {
5544   PetscErrorCode ierr;
5545   PetscInt       m=A->rmap->n,n=B->cmap->n;
5546   Mat            Cmat;
5547 
5548   PetscFunctionBegin;
5549   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5550   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5551   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5552   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5553   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5554   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5555   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5556   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5557 
5558   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5559 
5560   *C = Cmat;
5561   PetscFunctionReturn(0);
5562 }
5563 
5564 /* ----------------------------------------------------------------*/
5565 #undef __FUNCT__
5566 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5567 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5568 {
5569   PetscErrorCode ierr;
5570 
5571   PetscFunctionBegin;
5572   if (scall == MAT_INITIAL_MATRIX) {
5573     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5574   }
5575   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5576   PetscFunctionReturn(0);
5577 }
5578 
5579 #if defined(PETSC_HAVE_MUMPS)
5580 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5581 #endif
5582 #if defined(PETSC_HAVE_PASTIX)
5583 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5584 #endif
5585 #if defined(PETSC_HAVE_SUPERLU_DIST)
5586 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5587 #endif
5588 #if defined(PETSC_HAVE_CLIQUE)
5589 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5590 #endif
5591 
5592 /*MC
5593    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5594 
5595    Options Database Keys:
5596 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5597 
5598   Level: beginner
5599 
5600 .seealso: MatCreateAIJ()
5601 M*/
5602 
5603 #undef __FUNCT__
5604 #define __FUNCT__ "MatCreate_MPIAIJ"
5605 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5606 {
5607   Mat_MPIAIJ     *b;
5608   PetscErrorCode ierr;
5609   PetscMPIInt    size;
5610 
5611   PetscFunctionBegin;
5612   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5613 
5614   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5615   B->data       = (void*)b;
5616   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5617   B->assembled  = PETSC_FALSE;
5618   B->insertmode = NOT_SET_VALUES;
5619   b->size       = size;
5620 
5621   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5622 
5623   /* build cache for off array entries formed */
5624   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5625 
5626   b->donotstash  = PETSC_FALSE;
5627   b->colmap      = 0;
5628   b->garray      = 0;
5629   b->roworiented = PETSC_TRUE;
5630 
5631   /* stuff used for matrix vector multiply */
5632   b->lvec  = NULL;
5633   b->Mvctx = NULL;
5634 
5635   /* stuff for MatGetRow() */
5636   b->rowindices   = 0;
5637   b->rowvalues    = 0;
5638   b->getrowactive = PETSC_FALSE;
5639 
5640   /* flexible pointer used in CUSP/CUSPARSE classes */
5641   b->spptr = NULL;
5642 
5643 #if defined(PETSC_HAVE_MUMPS)
5644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C","MatGetFactor_aij_mumps",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5645 #endif
5646 #if defined(PETSC_HAVE_PASTIX)
5647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C","MatGetFactor_mpiaij_pastix",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5648 #endif
5649 #if defined(PETSC_HAVE_SUPERLU_DIST)
5650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C","MatGetFactor_mpiaij_superlu_dist",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5651 #endif
5652 #if defined(PETSC_HAVE_CLIQUE)
5653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C","MatGetFactor_aij_clique",MatGetFactor_aij_clique);CHKERRQ(ierr);
5654 #endif
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIAIJ",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIAIJ",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIAIJ",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_MPIAIJ",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C","MatMPIAIJSetPreallocation_MPIAIJ",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C","MatMPIAIJSetPreallocationCSR_MPIAIJ",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIAIJ",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C","MatConvert_MPIAIJ_MPIAIJPERM",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C","MatConvert_MPIAIJ_MPIAIJCRL",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C","MatConvert_MPIAIJ_MPISBAIJ",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C","MatMatMult_MPIDense_MPIAIJ",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C","MatMatMultSymbolic_MPIDense_MPIAIJ",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C","MatMatMultNumeric_MPIDense_MPIAIJ",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5668   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5669   PetscFunctionReturn(0);
5670 }
5671 
5672 #undef __FUNCT__
5673 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5674 /*@
5675      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5676          and "off-diagonal" part of the matrix in CSR format.
5677 
5678    Collective on MPI_Comm
5679 
5680    Input Parameters:
5681 +  comm - MPI communicator
5682 .  m - number of local rows (Cannot be PETSC_DECIDE)
5683 .  n - This value should be the same as the local size used in creating the
5684        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5685        calculated if N is given) For square matrices n is almost always m.
5686 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5687 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5688 .   i - row indices for "diagonal" portion of matrix
5689 .   j - column indices
5690 .   a - matrix values
5691 .   oi - row indices for "off-diagonal" portion of matrix
5692 .   oj - column indices
5693 -   oa - matrix values
5694 
5695    Output Parameter:
5696 .   mat - the matrix
5697 
5698    Level: advanced
5699 
5700    Notes:
5701        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5702        must free the arrays once the matrix has been destroyed and not before.
5703 
5704        The i and j indices are 0 based
5705 
5706        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5707 
5708        This sets local rows and cannot be used to set off-processor values.
5709 
5710        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5711        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5712        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5713        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5714        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5715        communication if it is known that only local entries will be set.
5716 
5717 .keywords: matrix, aij, compressed row, sparse, parallel
5718 
5719 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5720           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5721 @*/
5722 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5723 {
5724   PetscErrorCode ierr;
5725   Mat_MPIAIJ     *maij;
5726 
5727   PetscFunctionBegin;
5728   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5729   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5730   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5731   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5732   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5733   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5734   maij = (Mat_MPIAIJ*) (*mat)->data;
5735 
5736   (*mat)->preallocated = PETSC_TRUE;
5737 
5738   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5739   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5740 
5741   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5742   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5743 
5744   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5745   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5746   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5747   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5748 
5749   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5750   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5751   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5752   PetscFunctionReturn(0);
5753 }
5754 
5755 /*
5756     Special version for direct calls from Fortran
5757 */
5758 #include <petsc-private/fortranimpl.h>
5759 
5760 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5761 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5762 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5763 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5764 #endif
5765 
5766 /* Change these macros so can be used in void function */
5767 #undef CHKERRQ
5768 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5769 #undef SETERRQ2
5770 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5771 #undef SETERRQ3
5772 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5773 #undef SETERRQ
5774 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5775 
5776 #undef __FUNCT__
5777 #define __FUNCT__ "matsetvaluesmpiaij_"
5778 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5779 {
5780   Mat            mat  = *mmat;
5781   PetscInt       m    = *mm, n = *mn;
5782   InsertMode     addv = *maddv;
5783   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5784   PetscScalar    value;
5785   PetscErrorCode ierr;
5786 
5787   MatCheckPreallocated(mat,1);
5788   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5789 
5790 #if defined(PETSC_USE_DEBUG)
5791   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5792 #endif
5793   {
5794     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5795     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5796     PetscBool roworiented = aij->roworiented;
5797 
5798     /* Some Variables required in the macro */
5799     Mat        A                 = aij->A;
5800     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5801     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5802     MatScalar  *aa               = a->a;
5803     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5804     Mat        B                 = aij->B;
5805     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5806     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5807     MatScalar  *ba               = b->a;
5808 
5809     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5810     PetscInt  nonew = a->nonew;
5811     MatScalar *ap1,*ap2;
5812 
5813     PetscFunctionBegin;
5814     for (i=0; i<m; i++) {
5815       if (im[i] < 0) continue;
5816 #if defined(PETSC_USE_DEBUG)
5817       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5818 #endif
5819       if (im[i] >= rstart && im[i] < rend) {
5820         row      = im[i] - rstart;
5821         lastcol1 = -1;
5822         rp1      = aj + ai[row];
5823         ap1      = aa + ai[row];
5824         rmax1    = aimax[row];
5825         nrow1    = ailen[row];
5826         low1     = 0;
5827         high1    = nrow1;
5828         lastcol2 = -1;
5829         rp2      = bj + bi[row];
5830         ap2      = ba + bi[row];
5831         rmax2    = bimax[row];
5832         nrow2    = bilen[row];
5833         low2     = 0;
5834         high2    = nrow2;
5835 
5836         for (j=0; j<n; j++) {
5837           if (roworiented) value = v[i*n+j];
5838           else value = v[i+j*m];
5839           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5840           if (in[j] >= cstart && in[j] < cend) {
5841             col = in[j] - cstart;
5842             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5843           } else if (in[j] < 0) continue;
5844 #if defined(PETSC_USE_DEBUG)
5845           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5846 #endif
5847           else {
5848             if (mat->was_assembled) {
5849               if (!aij->colmap) {
5850                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5851               }
5852 #if defined(PETSC_USE_CTABLE)
5853               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5854               col--;
5855 #else
5856               col = aij->colmap[in[j]] - 1;
5857 #endif
5858               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5859                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5860                 col  =  in[j];
5861                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5862                 B     = aij->B;
5863                 b     = (Mat_SeqAIJ*)B->data;
5864                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5865                 rp2   = bj + bi[row];
5866                 ap2   = ba + bi[row];
5867                 rmax2 = bimax[row];
5868                 nrow2 = bilen[row];
5869                 low2  = 0;
5870                 high2 = nrow2;
5871                 bm    = aij->B->rmap->n;
5872                 ba    = b->a;
5873               }
5874             } else col = in[j];
5875             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5876           }
5877         }
5878       } else if (!aij->donotstash) {
5879         if (roworiented) {
5880           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5881         } else {
5882           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5883         }
5884       }
5885     }
5886   }
5887   PetscFunctionReturnVoid();
5888 }
5889 
5890