xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c79c55279c2f638f1e2600b27e73cde5e9f73b5c)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatDestroy_MatRedundant"
2512 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2513 {
2514   PetscErrorCode ierr;
2515   Mat_Redundant  *redund;
2516   PetscInt       i;
2517   PetscMPIInt    size;
2518 
2519   PetscFunctionBegin;
2520   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2521   if (size == 1) {
2522     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2523     redund = a->redundant;
2524   } else {
2525     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2526     redund = a->redundant;
2527   }
2528   if (redund){
2529     if (redund->matseq) { /* via MatGetSubMatrices()  */
2530       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2531       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2532       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2533       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2534     } else {
2535       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2536       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2537       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2538       for (i=0; i<redund->nrecvs; i++) {
2539         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2540         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2541       }
2542       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2543     }
2544 
2545     if (redund->psubcomm) {
2546       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2547     }
2548     ierr = redund->Destroy(A);CHKERRQ(ierr);
2549     ierr = PetscFree(redund);CHKERRQ(ierr);
2550   }
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm"
2556 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2557 {
2558   PetscMPIInt    rank,size;
2559   MPI_Comm       comm,subcomm=psubcomm->comm;
2560   PetscErrorCode ierr;
2561   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2562   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2563   PetscInt       *rowrange = mat->rmap->range;
2564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2565   Mat            A = aij->A,B=aij->B,C=*matredundant;
2566   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2567   PetscScalar    *sbuf_a;
2568   PetscInt       nzlocal=a->nz+b->nz;
2569   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2570   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2571   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2572   MatScalar      *aworkA,*aworkB;
2573   PetscScalar    *vals;
2574   PetscMPIInt    tag1,tag2,tag3,imdex;
2575   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2576   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2577   MPI_Status     recv_status,*send_status;
2578   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2579   PetscInt       **rbuf_j=NULL;
2580   PetscScalar    **rbuf_a=NULL;
2581   Mat_Redundant  *redund =NULL;
2582   PetscBool      flg=PETSC_FALSE;
2583 
2584   PetscFunctionBegin;
2585   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2586   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2587   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2588   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2589   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2590 
2591   /* ---------- new imples: use MatGetSubMatrices() ------------*/
2592   ierr = PetscOptionsGetBool(NULL,"-new",&flg,NULL);CHKERRQ(ierr);
2593   if (flg) {
2594     Mat        *matseq;
2595     IS         isrow,iscol;
2596     PetscInt   mloc_sub,rstart,rend;
2597 
2598     if (reuse == MAT_INITIAL_MATRIX) {
2599       /* create a local sequential matrix matseq[0] */
2600       mloc_sub = PETSC_DECIDE;
2601       ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2602       ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2603       rstart = rend - mloc_sub;
2604       /* printf("[%d] Use MatGetSubMatrices()...rows %d - %d, mloc_sub %d\n",rank,rstart,rend,mloc_sub); */
2605       ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2606       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2607     } else { /* reuse == MAT_REUSE_MATRIX */
2608       if (subsize == 1) {
2609         Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2610         redund = c->redundant;
2611       } else {
2612         Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2613         redund = c->redundant;
2614       }
2615 
2616       isrow = redund->isrow;
2617       iscol = redund->iscol;
2618       matseq = redund->matseq;
2619     }
2620 
2621     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2622     /*
2623     if (rank==0) {
2624       printf("[%d] matsub:\n",rank);
2625       ierr = MatView(matseq[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
2626     }
2627     ierr = MPI_Barrier(comm);CHKERRQ(ierr);
2628      */
2629 
2630     /* Create matredundant by concatenating matseq[0] from processors in this subcomm */
2631     /*
2632     if (reuse == MAT_REUSE_MATRIX) {
2633       if (!rank) printf("matredundant:\n");
2634       ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
2635     }
2636      */
2637     ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2638     /*
2639     if (nsubcomm == 1) {
2640       if (!rank) printf( "matredundant\n");
2641       ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
2642      } */
2643 
2644     if (reuse == MAT_INITIAL_MATRIX) {
2645       /* create a supporting struct and attach it to C for reuse */
2646       ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr);
2647       if (subsize == 1) {
2648         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2649         c->redundant = redund;
2650       } else {
2651         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2652         c->redundant = redund;
2653       }
2654 
2655       redund->isrow     = isrow;
2656       redund->iscol     = iscol;
2657       redund->matseq    = matseq;
2658       redund->psubcomm  = NULL;
2659 
2660       redund->Destroy               = (*matredundant)->ops->destroy;
2661       (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2662     }
2663     PetscFunctionReturn(0);
2664   }
2665   /* ----------------------------------------------------*/
2666 
2667   if (reuse == MAT_REUSE_MATRIX) {
2668     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2669     if (subsize == 1) {
2670       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2671       redund = c->redundant;
2672     } else {
2673       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2674       redund = c->redundant;
2675     }
2676     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2677 
2678     nsends    = redund->nsends;
2679     nrecvs    = redund->nrecvs;
2680     send_rank = redund->send_rank;
2681     recv_rank = redund->recv_rank;
2682     sbuf_nz   = redund->sbuf_nz;
2683     rbuf_nz   = redund->rbuf_nz;
2684     sbuf_j    = redund->sbuf_j;
2685     sbuf_a    = redund->sbuf_a;
2686     rbuf_j    = redund->rbuf_j;
2687     rbuf_a    = redund->rbuf_a;
2688   }
2689 
2690   if (reuse == MAT_INITIAL_MATRIX) {
2691     PetscInt    nleftover,np_subcomm;
2692 
2693     /* get the destination processors' id send_rank, nsends and nrecvs */
2694     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2695 
2696     np_subcomm = size/nsubcomm;
2697     nleftover  = size - nsubcomm*np_subcomm;
2698 
2699     nsends = 0; nrecvs = 0;
2700     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2701       /* -------------------------------------------*/
2702       for (i=0; i<size; i++) {
2703         if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2704           send_rank[nsends]   = i; nsends++;
2705           recv_rank[nrecvs++] = i;
2706           /* printf("[%d] send to and recv from [%d]\n",rank,i); */
2707         }
2708       }
2709       if (rank >= size - nleftover) { /* this proc is a leftover processor */
2710         i = size-nleftover-1;
2711         j = 0;
2712         while (j < nsubcomm - nleftover) {
2713           send_rank[nsends++] = i;
2714           i--; j++;
2715           /* printf("[%d] send to [%d]\n",rank,i); */
2716         }
2717       }
2718 
2719       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2720         for (i=0; i<nleftover; i++) {
2721           recv_rank[nrecvs++] = size-nleftover+i;
2722           /* printf("[%d] recv from [%d]\n",rank,i); */
2723         }
2724       }
2725     } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) {
2726       /* --------------------------------------------------*/
2727       PetscInt color,subcommstart;
2728       subcommstart=0;
2729       for (color=0; color<nsubcomm; color++) {
2730         if (psubcomm->color != color) {
2731           for (i=0; i<psubcomm->subsize[color]; i++) {
2732             if (subrank == i) { /* my_subrank == other's subrank */
2733               send_rank[nsends++] = subcommstart+i;
2734               recv_rank[nrecvs++] = subcommstart+i;
2735               /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */
2736             }
2737           }
2738         }
2739         subcommstart += psubcomm->subsize[color];
2740       }
2741       if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */
2742         subcommstart=0;
2743         for (color=0; color<nsubcomm; color++) {
2744           subcommstart += psubcomm->subsize[color];
2745           if (psubcomm->color == color) continue;
2746           if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */
2747             send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */
2748             /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */
2749           }
2750         }
2751       }
2752 
2753       if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2754         subcommstart=0;
2755         for (color=0; color<nsubcomm; color++) {
2756           subcommstart += psubcomm->subsize[color];
2757           if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */
2758             recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */
2759             /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */
2760           }
2761         }
2762       }
2763     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for PetscSubcomm type %D",psubcomm->type);
2764 
2765     /* allocate sbuf_j, sbuf_a */
2766     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2767     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2768     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2769     /*
2770     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2771     ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
2772      */
2773   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2774 
2775   /* copy mat's local entries into the buffers */
2776   if (reuse == MAT_INITIAL_MATRIX) {
2777     rownz_max = 0;
2778     rptr      = sbuf_j;
2779     cols      = sbuf_j + rend-rstart + 1;
2780     vals      = sbuf_a;
2781     rptr[0]   = 0;
2782     for (i=0; i<rend-rstart; i++) {
2783       row    = i + rstart;
2784       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2785       ncols  = nzA + nzB;
2786       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2787       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2788       /* load the column indices for this row into cols */
2789       lwrite = 0;
2790       for (l=0; l<nzB; l++) {
2791         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2792           vals[lwrite]   = aworkB[l];
2793           cols[lwrite++] = ctmp;
2794         }
2795       }
2796       for (l=0; l<nzA; l++) {
2797         vals[lwrite]   = aworkA[l];
2798         cols[lwrite++] = cstart + cworkA[l];
2799       }
2800       for (l=0; l<nzB; l++) {
2801         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2802           vals[lwrite]   = aworkB[l];
2803           cols[lwrite++] = ctmp;
2804         }
2805       }
2806       vals     += ncols;
2807       cols     += ncols;
2808       rptr[i+1] = rptr[i] + ncols;
2809       if (rownz_max < ncols) rownz_max = ncols;
2810     }
2811     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2812   } else { /* only copy matrix values into sbuf_a */
2813     rptr    = sbuf_j;
2814     vals    = sbuf_a;
2815     rptr[0] = 0;
2816     for (i=0; i<rend-rstart; i++) {
2817       row    = i + rstart;
2818       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2819       ncols  = nzA + nzB;
2820       cworkB = b->j + b->i[i];
2821       aworkA = a->a + a->i[i];
2822       aworkB = b->a + b->i[i];
2823       lwrite = 0;
2824       for (l=0; l<nzB; l++) {
2825         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2826       }
2827       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2828       for (l=0; l<nzB; l++) {
2829         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2830       }
2831       vals     += ncols;
2832       rptr[i+1] = rptr[i] + ncols;
2833     }
2834   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2835 
2836   /* send nzlocal to others, and recv other's nzlocal */
2837   /*--------------------------------------------------*/
2838   if (reuse == MAT_INITIAL_MATRIX) {
2839     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2840 
2841     s_waits2 = s_waits3 + nsends;
2842     s_waits1 = s_waits2 + nsends;
2843     r_waits1 = s_waits1 + nsends;
2844     r_waits2 = r_waits1 + nrecvs;
2845     r_waits3 = r_waits2 + nrecvs;
2846   } else {
2847     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2848 
2849     r_waits3 = s_waits3 + nsends;
2850   }
2851 
2852   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2853   if (reuse == MAT_INITIAL_MATRIX) {
2854     /* get new tags to keep the communication clean */
2855     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2856     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2857     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2858 
2859     /* post receives of other's nzlocal */
2860     for (i=0; i<nrecvs; i++) {
2861       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2862     }
2863     /* send nzlocal to others */
2864     for (i=0; i<nsends; i++) {
2865       sbuf_nz[i] = nzlocal;
2866       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2867     }
2868     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2869     count = nrecvs;
2870     while (count) {
2871       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2872 
2873       recv_rank[imdex] = recv_status.MPI_SOURCE;
2874       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2875       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2876 
2877       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2878 
2879       rbuf_nz[imdex] += i + 2;
2880 
2881       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2882       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2883       count--;
2884     }
2885     /* wait on sends of nzlocal */
2886     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2887     /* send mat->i,j to others, and recv from other's */
2888     /*------------------------------------------------*/
2889     for (i=0; i<nsends; i++) {
2890       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2891       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2892     }
2893     /* wait on receives of mat->i,j */
2894     /*------------------------------*/
2895     count = nrecvs;
2896     while (count) {
2897       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2898       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2899       count--;
2900     }
2901     /* wait on sends of mat->i,j */
2902     /*---------------------------*/
2903     if (nsends) {
2904       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2905     }
2906   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2907 
2908   /* post receives, send and receive mat->a */
2909   /*----------------------------------------*/
2910   for (imdex=0; imdex<nrecvs; imdex++) {
2911     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2912   }
2913   for (i=0; i<nsends; i++) {
2914     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2915   }
2916   count = nrecvs;
2917   while (count) {
2918     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2919     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2920     count--;
2921   }
2922   if (nsends) {
2923     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2924   }
2925 
2926   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2927 
2928   /* create redundant matrix */
2929   /*-------------------------*/
2930   if (reuse == MAT_INITIAL_MATRIX) {
2931     const PetscInt *range;
2932     PetscInt       rstart_sub,rend_sub,mloc_sub;
2933 
2934     /* compute rownz_max for preallocation */
2935     for (imdex=0; imdex<nrecvs; imdex++) {
2936       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2937       rptr = rbuf_j[imdex];
2938       for (i=0; i<j; i++) {
2939         ncols = rptr[i+1] - rptr[i];
2940         if (rownz_max < ncols) rownz_max = ncols;
2941       }
2942     }
2943 
2944     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2945 
2946     /* get local size of redundant matrix
2947        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2948     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2949     rstart_sub = range[nsubcomm*subrank];
2950     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2951       rend_sub = range[nsubcomm*(subrank+1)];
2952     } else {
2953       rend_sub = mat->rmap->N;
2954     }
2955     mloc_sub = rend_sub - rstart_sub;
2956 
2957     if (M == N) {
2958       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2959     } else { /* non-square matrix */
2960       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2961     }
2962     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2963     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2964     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2965     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2966   } else {
2967     C = *matredundant;
2968   }
2969 
2970   /* insert local matrix entries */
2971   rptr = sbuf_j;
2972   cols = sbuf_j + rend-rstart + 1;
2973   vals = sbuf_a;
2974   for (i=0; i<rend-rstart; i++) {
2975     row   = i + rstart;
2976     ncols = rptr[i+1] - rptr[i];
2977     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2978     vals += ncols;
2979     cols += ncols;
2980   }
2981   /* insert received matrix entries */
2982   for (imdex=0; imdex<nrecvs; imdex++) {
2983     rstart = rowrange[recv_rank[imdex]];
2984     rend   = rowrange[recv_rank[imdex]+1];
2985     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2986     rptr   = rbuf_j[imdex];
2987     cols   = rbuf_j[imdex] + rend-rstart + 1;
2988     vals   = rbuf_a[imdex];
2989     for (i=0; i<rend-rstart; i++) {
2990       row   = i + rstart;
2991       ncols = rptr[i+1] - rptr[i];
2992       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2993       vals += ncols;
2994       cols += ncols;
2995     }
2996   }
2997   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2998   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2999 
3000   if (reuse == MAT_INITIAL_MATRIX) {
3001     *matredundant = C;
3002 
3003     /* create a supporting struct and attach it to C for reuse */
3004     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
3005     if (subsize == 1) {
3006       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
3007       c->redundant = redund;
3008     } else {
3009       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
3010       c->redundant = redund;
3011     }
3012 
3013     redund->nzlocal   = nzlocal;
3014     redund->nsends    = nsends;
3015     redund->nrecvs    = nrecvs;
3016     redund->send_rank = send_rank;
3017     redund->recv_rank = recv_rank;
3018     redund->sbuf_nz   = sbuf_nz;
3019     redund->rbuf_nz   = rbuf_nz;
3020     redund->sbuf_j    = sbuf_j;
3021     redund->sbuf_a    = sbuf_a;
3022     redund->rbuf_j    = rbuf_j;
3023     redund->rbuf_a    = rbuf_a;
3024     redund->psubcomm  = NULL;
3025 
3026     redund->Destroy = C->ops->destroy;
3027     C->ops->destroy = MatDestroy_MatRedundant;
3028   }
3029   PetscFunctionReturn(0);
3030 }
3031 
3032 #undef __FUNCT__
3033 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
3034 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
3035 {
3036   PetscErrorCode ierr;
3037   MPI_Comm       comm;
3038   PetscMPIInt    size,subsize;
3039   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
3040   Mat_Redundant  *redund =NULL;
3041 
3042   PetscFunctionBegin;
3043   if (subcomm == MPI_COMM_NULL) { /* use psubcomm */
3044     if (reuse ==  MAT_INITIAL_MATRIX) {
3045       if (psubcomm == NULL) { /* create psubcomm */
3046         ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3047         ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3048         ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
3049         ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
3050         ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
3051         ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
3052       }
3053       subcomm = psubcomm->comm;
3054       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
3055     } else { /* retrieve psubcomm */
3056       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
3057       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
3058       if (subsize == 1) {
3059         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
3060         psubcomm = c->redundant->psubcomm;
3061       } else {
3062         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3063         psubcomm = c->redundant->psubcomm;
3064       }
3065     }
3066   }
3067 
3068   if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
3069      ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr);
3070   } else {
3071     /* via MatGetSubMatrices() */
3072     Mat  *matseq;
3073     IS   isrow,iscol;
3074 
3075     if (reuse == MAT_INITIAL_MATRIX) {
3076       /* create a local sequential matrix matseq[0] */
3077       mloc_sub = PETSC_DECIDE;
3078       ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
3079       ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
3080       rstart = rend - mloc_sub;
3081       ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
3082       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
3083     } else { /* reuse == MAT_REUSE_MATRIX */
3084       if (subsize == 1) {
3085         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
3086         redund = c->redundant;
3087       } else {
3088         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3089         redund = c->redundant;
3090       }
3091 
3092       isrow  = redund->isrow;
3093       iscol  = redund->iscol;
3094       matseq = redund->matseq;
3095     }
3096 
3097     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
3098     ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
3099 
3100     if (reuse == MAT_INITIAL_MATRIX) {
3101       /* create a supporting struct and attach it to C for reuse */
3102       ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr);
3103       if (subsize == 1) {
3104         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
3105         c->redundant = redund;
3106       } else {
3107         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3108         c->redundant = redund;
3109       }
3110       redund->isrow    = isrow;
3111       redund->iscol    = iscol;
3112       redund->matseq   = matseq;
3113       redund->psubcomm = NULL;
3114       redund->Destroy               = (*matredundant)->ops->destroy;
3115       (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
3116     }
3117   }
3118 
3119   if (psubcomm) {  /* free psubcomm in MatDestroy_MatRedundant() */
3120     ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
3121     if (subsize == 1) {
3122       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
3123       c->redundant->psubcomm = psubcomm;
3124     } else {
3125       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
3126       c->redundant->psubcomm = psubcomm ;
3127     }
3128   }
3129   PetscFunctionReturn(0);
3130 }
3131 
3132 #undef __FUNCT__
3133 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
3134 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3135 {
3136   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3137   PetscErrorCode ierr;
3138   PetscInt       i,*idxb = 0;
3139   PetscScalar    *va,*vb;
3140   Vec            vtmp;
3141 
3142   PetscFunctionBegin;
3143   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
3144   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3145   if (idx) {
3146     for (i=0; i<A->rmap->n; i++) {
3147       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3148     }
3149   }
3150 
3151   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3152   if (idx) {
3153     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3154   }
3155   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3156   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3157 
3158   for (i=0; i<A->rmap->n; i++) {
3159     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3160       va[i] = vb[i];
3161       if (idx) idx[i] = a->garray[idxb[i]];
3162     }
3163   }
3164 
3165   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3166   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3167   ierr = PetscFree(idxb);CHKERRQ(ierr);
3168   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3169   PetscFunctionReturn(0);
3170 }
3171 
3172 #undef __FUNCT__
3173 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
3174 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3175 {
3176   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3177   PetscErrorCode ierr;
3178   PetscInt       i,*idxb = 0;
3179   PetscScalar    *va,*vb;
3180   Vec            vtmp;
3181 
3182   PetscFunctionBegin;
3183   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
3184   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3185   if (idx) {
3186     for (i=0; i<A->cmap->n; i++) {
3187       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3188     }
3189   }
3190 
3191   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3192   if (idx) {
3193     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3194   }
3195   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3196   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3197 
3198   for (i=0; i<A->rmap->n; i++) {
3199     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3200       va[i] = vb[i];
3201       if (idx) idx[i] = a->garray[idxb[i]];
3202     }
3203   }
3204 
3205   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3206   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3207   ierr = PetscFree(idxb);CHKERRQ(ierr);
3208   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3209   PetscFunctionReturn(0);
3210 }
3211 
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3214 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3215 {
3216   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3217   PetscInt       n      = A->rmap->n;
3218   PetscInt       cstart = A->cmap->rstart;
3219   PetscInt       *cmap  = mat->garray;
3220   PetscInt       *diagIdx, *offdiagIdx;
3221   Vec            diagV, offdiagV;
3222   PetscScalar    *a, *diagA, *offdiagA;
3223   PetscInt       r;
3224   PetscErrorCode ierr;
3225 
3226   PetscFunctionBegin;
3227   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3228   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3229   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3230   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3231   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3232   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3233   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3234   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3235   for (r = 0; r < n; ++r) {
3236     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3237       a[r]   = diagA[r];
3238       idx[r] = cstart + diagIdx[r];
3239     } else {
3240       a[r]   = offdiagA[r];
3241       idx[r] = cmap[offdiagIdx[r]];
3242     }
3243   }
3244   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3245   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3246   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3247   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3248   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3249   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 #undef __FUNCT__
3254 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3255 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3256 {
3257   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3258   PetscInt       n      = A->rmap->n;
3259   PetscInt       cstart = A->cmap->rstart;
3260   PetscInt       *cmap  = mat->garray;
3261   PetscInt       *diagIdx, *offdiagIdx;
3262   Vec            diagV, offdiagV;
3263   PetscScalar    *a, *diagA, *offdiagA;
3264   PetscInt       r;
3265   PetscErrorCode ierr;
3266 
3267   PetscFunctionBegin;
3268   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3269   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3270   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3271   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3272   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3273   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3274   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3275   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3276   for (r = 0; r < n; ++r) {
3277     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3278       a[r]   = diagA[r];
3279       idx[r] = cstart + diagIdx[r];
3280     } else {
3281       a[r]   = offdiagA[r];
3282       idx[r] = cmap[offdiagIdx[r]];
3283     }
3284   }
3285   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3286   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3287   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3288   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3289   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3290   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3291   PetscFunctionReturn(0);
3292 }
3293 
3294 #undef __FUNCT__
3295 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3296 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3297 {
3298   PetscErrorCode ierr;
3299   Mat            *dummy;
3300 
3301   PetscFunctionBegin;
3302   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3303   *newmat = *dummy;
3304   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3305   PetscFunctionReturn(0);
3306 }
3307 
3308 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3309 
3310 #undef __FUNCT__
3311 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3312 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3313 {
3314   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3315   PetscErrorCode ierr;
3316 
3317   PetscFunctionBegin;
3318   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3319   PetscFunctionReturn(0);
3320 }
3321 
3322 #undef __FUNCT__
3323 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3324 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3325 {
3326   PetscErrorCode ierr;
3327   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3328 
3329   PetscFunctionBegin;
3330   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3331   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3332   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3333   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3334   PetscFunctionReturn(0);
3335 }
3336 
3337 /* -------------------------------------------------------------------*/
3338 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3339                                        MatGetRow_MPIAIJ,
3340                                        MatRestoreRow_MPIAIJ,
3341                                        MatMult_MPIAIJ,
3342                                 /* 4*/ MatMultAdd_MPIAIJ,
3343                                        MatMultTranspose_MPIAIJ,
3344                                        MatMultTransposeAdd_MPIAIJ,
3345 #if defined(PETSC_HAVE_PBGL)
3346                                        MatSolve_MPIAIJ,
3347 #else
3348                                        0,
3349 #endif
3350                                        0,
3351                                        0,
3352                                 /*10*/ 0,
3353                                        0,
3354                                        0,
3355                                        MatSOR_MPIAIJ,
3356                                        MatTranspose_MPIAIJ,
3357                                 /*15*/ MatGetInfo_MPIAIJ,
3358                                        MatEqual_MPIAIJ,
3359                                        MatGetDiagonal_MPIAIJ,
3360                                        MatDiagonalScale_MPIAIJ,
3361                                        MatNorm_MPIAIJ,
3362                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3363                                        MatAssemblyEnd_MPIAIJ,
3364                                        MatSetOption_MPIAIJ,
3365                                        MatZeroEntries_MPIAIJ,
3366                                 /*24*/ MatZeroRows_MPIAIJ,
3367                                        0,
3368 #if defined(PETSC_HAVE_PBGL)
3369                                        0,
3370 #else
3371                                        0,
3372 #endif
3373                                        0,
3374                                        0,
3375                                 /*29*/ MatSetUp_MPIAIJ,
3376 #if defined(PETSC_HAVE_PBGL)
3377                                        0,
3378 #else
3379                                        0,
3380 #endif
3381                                        0,
3382                                        0,
3383                                        0,
3384                                 /*34*/ MatDuplicate_MPIAIJ,
3385                                        0,
3386                                        0,
3387                                        0,
3388                                        0,
3389                                 /*39*/ MatAXPY_MPIAIJ,
3390                                        MatGetSubMatrices_MPIAIJ,
3391                                        MatIncreaseOverlap_MPIAIJ,
3392                                        MatGetValues_MPIAIJ,
3393                                        MatCopy_MPIAIJ,
3394                                 /*44*/ MatGetRowMax_MPIAIJ,
3395                                        MatScale_MPIAIJ,
3396                                        0,
3397                                        0,
3398                                        MatZeroRowsColumns_MPIAIJ,
3399                                 /*49*/ MatSetRandom_MPIAIJ,
3400                                        0,
3401                                        0,
3402                                        0,
3403                                        0,
3404                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3405                                        0,
3406                                        MatSetUnfactored_MPIAIJ,
3407                                        MatPermute_MPIAIJ,
3408                                        0,
3409                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3410                                        MatDestroy_MPIAIJ,
3411                                        MatView_MPIAIJ,
3412                                        0,
3413                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3414                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3415                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3416                                        0,
3417                                        0,
3418                                        0,
3419                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3420                                        MatGetRowMinAbs_MPIAIJ,
3421                                        0,
3422                                        MatSetColoring_MPIAIJ,
3423                                        0,
3424                                        MatSetValuesAdifor_MPIAIJ,
3425                                 /*75*/ MatFDColoringApply_AIJ,
3426                                        0,
3427                                        0,
3428                                        0,
3429                                        MatFindZeroDiagonals_MPIAIJ,
3430                                 /*80*/ 0,
3431                                        0,
3432                                        0,
3433                                 /*83*/ MatLoad_MPIAIJ,
3434                                        0,
3435                                        0,
3436                                        0,
3437                                        0,
3438                                        0,
3439                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3440                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3441                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3442                                        MatPtAP_MPIAIJ_MPIAIJ,
3443                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3444                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3445                                        0,
3446                                        0,
3447                                        0,
3448                                        0,
3449                                 /*99*/ 0,
3450                                        0,
3451                                        0,
3452                                        MatConjugate_MPIAIJ,
3453                                        0,
3454                                 /*104*/MatSetValuesRow_MPIAIJ,
3455                                        MatRealPart_MPIAIJ,
3456                                        MatImaginaryPart_MPIAIJ,
3457                                        0,
3458                                        0,
3459                                 /*109*/0,
3460                                        MatGetRedundantMatrix_MPIAIJ,
3461                                        MatGetRowMin_MPIAIJ,
3462                                        0,
3463                                        0,
3464                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3465                                        0,
3466                                        0,
3467                                        0,
3468                                        0,
3469                                 /*119*/0,
3470                                        0,
3471                                        0,
3472                                        0,
3473                                        MatGetMultiProcBlock_MPIAIJ,
3474                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3475                                        MatGetColumnNorms_MPIAIJ,
3476                                        MatInvertBlockDiagonal_MPIAIJ,
3477                                        0,
3478                                        MatGetSubMatricesParallel_MPIAIJ,
3479                                 /*129*/0,
3480                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3481                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3482                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3483                                        0,
3484                                 /*134*/0,
3485                                        0,
3486                                        0,
3487                                        0,
3488                                        0,
3489                                 /*139*/0,
3490                                        0
3491 };
3492 
3493 /* ----------------------------------------------------------------------------------------*/
3494 
3495 #undef __FUNCT__
3496 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3497 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3498 {
3499   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3500   PetscErrorCode ierr;
3501 
3502   PetscFunctionBegin;
3503   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3504   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3505   PetscFunctionReturn(0);
3506 }
3507 
3508 #undef __FUNCT__
3509 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3510 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3511 {
3512   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3513   PetscErrorCode ierr;
3514 
3515   PetscFunctionBegin;
3516   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3517   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3518   PetscFunctionReturn(0);
3519 }
3520 
3521 #undef __FUNCT__
3522 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3523 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3524 {
3525   Mat_MPIAIJ     *b;
3526   PetscErrorCode ierr;
3527 
3528   PetscFunctionBegin;
3529   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3530   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3531   b = (Mat_MPIAIJ*)B->data;
3532 
3533   if (!B->preallocated) {
3534     /* Explicitly create 2 MATSEQAIJ matrices. */
3535     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3536     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3537     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3538     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3539     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3540     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3541     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3542     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3543     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3544     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3545   }
3546 
3547   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3548   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3549   B->preallocated = PETSC_TRUE;
3550   PetscFunctionReturn(0);
3551 }
3552 
3553 #undef __FUNCT__
3554 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3555 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3556 {
3557   Mat            mat;
3558   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3559   PetscErrorCode ierr;
3560 
3561   PetscFunctionBegin;
3562   *newmat = 0;
3563   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3564   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3565   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3566   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3567   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3568   a       = (Mat_MPIAIJ*)mat->data;
3569 
3570   mat->factortype   = matin->factortype;
3571   mat->rmap->bs     = matin->rmap->bs;
3572   mat->cmap->bs     = matin->cmap->bs;
3573   mat->assembled    = PETSC_TRUE;
3574   mat->insertmode   = NOT_SET_VALUES;
3575   mat->preallocated = PETSC_TRUE;
3576 
3577   a->size         = oldmat->size;
3578   a->rank         = oldmat->rank;
3579   a->donotstash   = oldmat->donotstash;
3580   a->roworiented  = oldmat->roworiented;
3581   a->rowindices   = 0;
3582   a->rowvalues    = 0;
3583   a->getrowactive = PETSC_FALSE;
3584 
3585   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3586   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3587 
3588   if (oldmat->colmap) {
3589 #if defined(PETSC_USE_CTABLE)
3590     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3591 #else
3592     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3593     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3594     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3595 #endif
3596   } else a->colmap = 0;
3597   if (oldmat->garray) {
3598     PetscInt len;
3599     len  = oldmat->B->cmap->n;
3600     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3601     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3602     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3603   } else a->garray = 0;
3604 
3605   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3606   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3607   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3608   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3609   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3610   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3611   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3612   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3613   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3614   *newmat = mat;
3615   PetscFunctionReturn(0);
3616 }
3617 
3618 
3619 
3620 #undef __FUNCT__
3621 #define __FUNCT__ "MatLoad_MPIAIJ"
3622 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3623 {
3624   PetscScalar    *vals,*svals;
3625   MPI_Comm       comm;
3626   PetscErrorCode ierr;
3627   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3628   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3629   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3630   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3631   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3632   int            fd;
3633   PetscInt       bs = 1;
3634 
3635   PetscFunctionBegin;
3636   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3637   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3638   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3639   if (!rank) {
3640     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3641     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3642     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3643   }
3644 
3645   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3646   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3647   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3648 
3649   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3650 
3651   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3652   M    = header[1]; N = header[2];
3653   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3654   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3655   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3656 
3657   /* If global sizes are set, check if they are consistent with that given in the file */
3658   if (sizesset) {
3659     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3660   }
3661   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3662   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3663 
3664   /* determine ownership of all (block) rows */
3665   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3666   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3667   else m = newMat->rmap->n; /* Set by user */
3668 
3669   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3670   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3671 
3672   /* First process needs enough room for process with most rows */
3673   if (!rank) {
3674     mmax = rowners[1];
3675     for (i=2; i<=size; i++) {
3676       mmax = PetscMax(mmax, rowners[i]);
3677     }
3678   } else mmax = -1;             /* unused, but compilers complain */
3679 
3680   rowners[0] = 0;
3681   for (i=2; i<=size; i++) {
3682     rowners[i] += rowners[i-1];
3683   }
3684   rstart = rowners[rank];
3685   rend   = rowners[rank+1];
3686 
3687   /* distribute row lengths to all processors */
3688   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3689   if (!rank) {
3690     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3691     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3692     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3693     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3694     for (j=0; j<m; j++) {
3695       procsnz[0] += ourlens[j];
3696     }
3697     for (i=1; i<size; i++) {
3698       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3699       /* calculate the number of nonzeros on each processor */
3700       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3701         procsnz[i] += rowlengths[j];
3702       }
3703       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3704     }
3705     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3706   } else {
3707     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3708   }
3709 
3710   if (!rank) {
3711     /* determine max buffer needed and allocate it */
3712     maxnz = 0;
3713     for (i=0; i<size; i++) {
3714       maxnz = PetscMax(maxnz,procsnz[i]);
3715     }
3716     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3717 
3718     /* read in my part of the matrix column indices  */
3719     nz   = procsnz[0];
3720     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3721     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3722 
3723     /* read in every one elses and ship off */
3724     for (i=1; i<size; i++) {
3725       nz   = procsnz[i];
3726       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3727       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3728     }
3729     ierr = PetscFree(cols);CHKERRQ(ierr);
3730   } else {
3731     /* determine buffer space needed for message */
3732     nz = 0;
3733     for (i=0; i<m; i++) {
3734       nz += ourlens[i];
3735     }
3736     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3737 
3738     /* receive message of column indices*/
3739     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3740   }
3741 
3742   /* determine column ownership if matrix is not square */
3743   if (N != M) {
3744     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3745     else n = newMat->cmap->n;
3746     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3747     cstart = cend - n;
3748   } else {
3749     cstart = rstart;
3750     cend   = rend;
3751     n      = cend - cstart;
3752   }
3753 
3754   /* loop over local rows, determining number of off diagonal entries */
3755   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3756   jj   = 0;
3757   for (i=0; i<m; i++) {
3758     for (j=0; j<ourlens[i]; j++) {
3759       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3760       jj++;
3761     }
3762   }
3763 
3764   for (i=0; i<m; i++) {
3765     ourlens[i] -= offlens[i];
3766   }
3767   if (!sizesset) {
3768     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3769   }
3770 
3771   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3772 
3773   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3774 
3775   for (i=0; i<m; i++) {
3776     ourlens[i] += offlens[i];
3777   }
3778 
3779   if (!rank) {
3780     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3781 
3782     /* read in my part of the matrix numerical values  */
3783     nz   = procsnz[0];
3784     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3785 
3786     /* insert into matrix */
3787     jj      = rstart;
3788     smycols = mycols;
3789     svals   = vals;
3790     for (i=0; i<m; i++) {
3791       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3792       smycols += ourlens[i];
3793       svals   += ourlens[i];
3794       jj++;
3795     }
3796 
3797     /* read in other processors and ship out */
3798     for (i=1; i<size; i++) {
3799       nz   = procsnz[i];
3800       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3801       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3802     }
3803     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3804   } else {
3805     /* receive numeric values */
3806     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3807 
3808     /* receive message of values*/
3809     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3810 
3811     /* insert into matrix */
3812     jj      = rstart;
3813     smycols = mycols;
3814     svals   = vals;
3815     for (i=0; i<m; i++) {
3816       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3817       smycols += ourlens[i];
3818       svals   += ourlens[i];
3819       jj++;
3820     }
3821   }
3822   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3823   ierr = PetscFree(vals);CHKERRQ(ierr);
3824   ierr = PetscFree(mycols);CHKERRQ(ierr);
3825   ierr = PetscFree(rowners);CHKERRQ(ierr);
3826   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3827   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3828   PetscFunctionReturn(0);
3829 }
3830 
3831 #undef __FUNCT__
3832 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3833 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3834 {
3835   PetscErrorCode ierr;
3836   IS             iscol_local;
3837   PetscInt       csize;
3838 
3839   PetscFunctionBegin;
3840   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3841   if (call == MAT_REUSE_MATRIX) {
3842     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3843     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3844   } else {
3845     PetscInt cbs;
3846     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3847     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3848     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3849   }
3850   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3851   if (call == MAT_INITIAL_MATRIX) {
3852     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3853     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3854   }
3855   PetscFunctionReturn(0);
3856 }
3857 
3858 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3859 #undef __FUNCT__
3860 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3861 /*
3862     Not great since it makes two copies of the submatrix, first an SeqAIJ
3863   in local and then by concatenating the local matrices the end result.
3864   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3865 
3866   Note: This requires a sequential iscol with all indices.
3867 */
3868 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3869 {
3870   PetscErrorCode ierr;
3871   PetscMPIInt    rank,size;
3872   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3873   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3874   PetscBool      allcolumns, colflag;
3875   Mat            M,Mreuse;
3876   MatScalar      *vwork,*aa;
3877   MPI_Comm       comm;
3878   Mat_SeqAIJ     *aij;
3879 
3880   PetscFunctionBegin;
3881   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3882   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3883   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3884 
3885   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3886   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3887   if (colflag && ncol == mat->cmap->N) {
3888     allcolumns = PETSC_TRUE;
3889   } else {
3890     allcolumns = PETSC_FALSE;
3891   }
3892   if (call ==  MAT_REUSE_MATRIX) {
3893     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3894     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3895     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3896   } else {
3897     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3898   }
3899 
3900   /*
3901       m - number of local rows
3902       n - number of columns (same on all processors)
3903       rstart - first row in new global matrix generated
3904   */
3905   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3906   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3907   if (call == MAT_INITIAL_MATRIX) {
3908     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3909     ii  = aij->i;
3910     jj  = aij->j;
3911 
3912     /*
3913         Determine the number of non-zeros in the diagonal and off-diagonal
3914         portions of the matrix in order to do correct preallocation
3915     */
3916 
3917     /* first get start and end of "diagonal" columns */
3918     if (csize == PETSC_DECIDE) {
3919       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3920       if (mglobal == n) { /* square matrix */
3921         nlocal = m;
3922       } else {
3923         nlocal = n/size + ((n % size) > rank);
3924       }
3925     } else {
3926       nlocal = csize;
3927     }
3928     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3929     rstart = rend - nlocal;
3930     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3931 
3932     /* next, compute all the lengths */
3933     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3934     olens = dlens + m;
3935     for (i=0; i<m; i++) {
3936       jend = ii[i+1] - ii[i];
3937       olen = 0;
3938       dlen = 0;
3939       for (j=0; j<jend; j++) {
3940         if (*jj < rstart || *jj >= rend) olen++;
3941         else dlen++;
3942         jj++;
3943       }
3944       olens[i] = olen;
3945       dlens[i] = dlen;
3946     }
3947     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3948     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3949     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3950     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3951     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3952     ierr = PetscFree(dlens);CHKERRQ(ierr);
3953   } else {
3954     PetscInt ml,nl;
3955 
3956     M    = *newmat;
3957     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3958     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3959     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3960     /*
3961          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3962        rather than the slower MatSetValues().
3963     */
3964     M->was_assembled = PETSC_TRUE;
3965     M->assembled     = PETSC_FALSE;
3966   }
3967   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3968   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3969   ii   = aij->i;
3970   jj   = aij->j;
3971   aa   = aij->a;
3972   for (i=0; i<m; i++) {
3973     row   = rstart + i;
3974     nz    = ii[i+1] - ii[i];
3975     cwork = jj;     jj += nz;
3976     vwork = aa;     aa += nz;
3977     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3978   }
3979 
3980   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3981   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3982   *newmat = M;
3983 
3984   /* save submatrix used in processor for next request */
3985   if (call ==  MAT_INITIAL_MATRIX) {
3986     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3987     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3988   }
3989   PetscFunctionReturn(0);
3990 }
3991 
3992 #undef __FUNCT__
3993 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3994 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3995 {
3996   PetscInt       m,cstart, cend,j,nnz,i,d;
3997   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3998   const PetscInt *JJ;
3999   PetscScalar    *values;
4000   PetscErrorCode ierr;
4001 
4002   PetscFunctionBegin;
4003   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
4004 
4005   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
4006   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
4007   m      = B->rmap->n;
4008   cstart = B->cmap->rstart;
4009   cend   = B->cmap->rend;
4010   rstart = B->rmap->rstart;
4011 
4012   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
4013 
4014 #if defined(PETSC_USE_DEBUGGING)
4015   for (i=0; i<m; i++) {
4016     nnz = Ii[i+1]- Ii[i];
4017     JJ  = J + Ii[i];
4018     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
4019     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
4020     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4021   }
4022 #endif
4023 
4024   for (i=0; i<m; i++) {
4025     nnz     = Ii[i+1]- Ii[i];
4026     JJ      = J + Ii[i];
4027     nnz_max = PetscMax(nnz_max,nnz);
4028     d       = 0;
4029     for (j=0; j<nnz; j++) {
4030       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4031     }
4032     d_nnz[i] = d;
4033     o_nnz[i] = nnz - d;
4034   }
4035   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4036   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4037 
4038   if (v) values = (PetscScalar*)v;
4039   else {
4040     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
4041     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
4042   }
4043 
4044   for (i=0; i<m; i++) {
4045     ii   = i + rstart;
4046     nnz  = Ii[i+1]- Ii[i];
4047     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
4048   }
4049   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4050   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4051 
4052   if (!v) {
4053     ierr = PetscFree(values);CHKERRQ(ierr);
4054   }
4055   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4056   PetscFunctionReturn(0);
4057 }
4058 
4059 #undef __FUNCT__
4060 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
4061 /*@
4062    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4063    (the default parallel PETSc format).
4064 
4065    Collective on MPI_Comm
4066 
4067    Input Parameters:
4068 +  B - the matrix
4069 .  i - the indices into j for the start of each local row (starts with zero)
4070 .  j - the column indices for each local row (starts with zero)
4071 -  v - optional values in the matrix
4072 
4073    Level: developer
4074 
4075    Notes:
4076        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4077      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4078      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4079 
4080        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4081 
4082        The format which is used for the sparse matrix input, is equivalent to a
4083     row-major ordering.. i.e for the following matrix, the input data expected is
4084     as shown:
4085 
4086         1 0 0
4087         2 0 3     P0
4088        -------
4089         4 5 6     P1
4090 
4091      Process0 [P0]: rows_owned=[0,1]
4092         i =  {0,1,3}  [size = nrow+1  = 2+1]
4093         j =  {0,0,2}  [size = nz = 6]
4094         v =  {1,2,3}  [size = nz = 6]
4095 
4096      Process1 [P1]: rows_owned=[2]
4097         i =  {0,3}    [size = nrow+1  = 1+1]
4098         j =  {0,1,2}  [size = nz = 6]
4099         v =  {4,5,6}  [size = nz = 6]
4100 
4101 .keywords: matrix, aij, compressed row, sparse, parallel
4102 
4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
4104           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4105 @*/
4106 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4107 {
4108   PetscErrorCode ierr;
4109 
4110   PetscFunctionBegin;
4111   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4112   PetscFunctionReturn(0);
4113 }
4114 
4115 #undef __FUNCT__
4116 #define __FUNCT__ "MatMPIAIJSetPreallocation"
4117 /*@C
4118    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4119    (the default parallel PETSc format).  For good matrix assembly performance
4120    the user should preallocate the matrix storage by setting the parameters
4121    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4122    performance can be increased by more than a factor of 50.
4123 
4124    Collective on MPI_Comm
4125 
4126    Input Parameters:
4127 +  A - the matrix
4128 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4129            (same value is used for all local rows)
4130 .  d_nnz - array containing the number of nonzeros in the various rows of the
4131            DIAGONAL portion of the local submatrix (possibly different for each row)
4132            or NULL, if d_nz is used to specify the nonzero structure.
4133            The size of this array is equal to the number of local rows, i.e 'm'.
4134            For matrices that will be factored, you must leave room for (and set)
4135            the diagonal entry even if it is zero.
4136 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4137            submatrix (same value is used for all local rows).
4138 -  o_nnz - array containing the number of nonzeros in the various rows of the
4139            OFF-DIAGONAL portion of the local submatrix (possibly different for
4140            each row) or NULL, if o_nz is used to specify the nonzero
4141            structure. The size of this array is equal to the number
4142            of local rows, i.e 'm'.
4143 
4144    If the *_nnz parameter is given then the *_nz parameter is ignored
4145 
4146    The AIJ format (also called the Yale sparse matrix format or
4147    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4148    storage.  The stored row and column indices begin with zero.
4149    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
4150 
4151    The parallel matrix is partitioned such that the first m0 rows belong to
4152    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4153    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4154 
4155    The DIAGONAL portion of the local submatrix of a processor can be defined
4156    as the submatrix which is obtained by extraction the part corresponding to
4157    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4158    first row that belongs to the processor, r2 is the last row belonging to
4159    the this processor, and c1-c2 is range of indices of the local part of a
4160    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4161    common case of a square matrix, the row and column ranges are the same and
4162    the DIAGONAL part is also square. The remaining portion of the local
4163    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4164 
4165    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4166 
4167    You can call MatGetInfo() to get information on how effective the preallocation was;
4168    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4169    You can also run with the option -info and look for messages with the string
4170    malloc in them to see if additional memory allocation was needed.
4171 
4172    Example usage:
4173 
4174    Consider the following 8x8 matrix with 34 non-zero values, that is
4175    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4176    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4177    as follows:
4178 
4179 .vb
4180             1  2  0  |  0  3  0  |  0  4
4181     Proc0   0  5  6  |  7  0  0  |  8  0
4182             9  0 10  | 11  0  0  | 12  0
4183     -------------------------------------
4184            13  0 14  | 15 16 17  |  0  0
4185     Proc1   0 18  0  | 19 20 21  |  0  0
4186             0  0  0  | 22 23  0  | 24  0
4187     -------------------------------------
4188     Proc2  25 26 27  |  0  0 28  | 29  0
4189            30  0  0  | 31 32 33  |  0 34
4190 .ve
4191 
4192    This can be represented as a collection of submatrices as:
4193 
4194 .vb
4195       A B C
4196       D E F
4197       G H I
4198 .ve
4199 
4200    Where the submatrices A,B,C are owned by proc0, D,E,F are
4201    owned by proc1, G,H,I are owned by proc2.
4202 
4203    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4204    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4205    The 'M','N' parameters are 8,8, and have the same values on all procs.
4206 
4207    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4208    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4209    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4210    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4211    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4212    matrix, ans [DF] as another SeqAIJ matrix.
4213 
4214    When d_nz, o_nz parameters are specified, d_nz storage elements are
4215    allocated for every row of the local diagonal submatrix, and o_nz
4216    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4217    One way to choose d_nz and o_nz is to use the max nonzerors per local
4218    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4219    In this case, the values of d_nz,o_nz are:
4220 .vb
4221      proc0 : dnz = 2, o_nz = 2
4222      proc1 : dnz = 3, o_nz = 2
4223      proc2 : dnz = 1, o_nz = 4
4224 .ve
4225    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4226    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4227    for proc3. i.e we are using 12+15+10=37 storage locations to store
4228    34 values.
4229 
4230    When d_nnz, o_nnz parameters are specified, the storage is specified
4231    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4232    In the above case the values for d_nnz,o_nnz are:
4233 .vb
4234      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4235      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4236      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4237 .ve
4238    Here the space allocated is sum of all the above values i.e 34, and
4239    hence pre-allocation is perfect.
4240 
4241    Level: intermediate
4242 
4243 .keywords: matrix, aij, compressed row, sparse, parallel
4244 
4245 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4246           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4247 @*/
4248 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4249 {
4250   PetscErrorCode ierr;
4251 
4252   PetscFunctionBegin;
4253   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4254   PetscValidType(B,1);
4255   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4256   PetscFunctionReturn(0);
4257 }
4258 
4259 #undef __FUNCT__
4260 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4261 /*@
4262      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4263          CSR format the local rows.
4264 
4265    Collective on MPI_Comm
4266 
4267    Input Parameters:
4268 +  comm - MPI communicator
4269 .  m - number of local rows (Cannot be PETSC_DECIDE)
4270 .  n - This value should be the same as the local size used in creating the
4271        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4272        calculated if N is given) For square matrices n is almost always m.
4273 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4274 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4275 .   i - row indices
4276 .   j - column indices
4277 -   a - matrix values
4278 
4279    Output Parameter:
4280 .   mat - the matrix
4281 
4282    Level: intermediate
4283 
4284    Notes:
4285        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4286      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4287      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4288 
4289        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4290 
4291        The format which is used for the sparse matrix input, is equivalent to a
4292     row-major ordering.. i.e for the following matrix, the input data expected is
4293     as shown:
4294 
4295         1 0 0
4296         2 0 3     P0
4297        -------
4298         4 5 6     P1
4299 
4300      Process0 [P0]: rows_owned=[0,1]
4301         i =  {0,1,3}  [size = nrow+1  = 2+1]
4302         j =  {0,0,2}  [size = nz = 6]
4303         v =  {1,2,3}  [size = nz = 6]
4304 
4305      Process1 [P1]: rows_owned=[2]
4306         i =  {0,3}    [size = nrow+1  = 1+1]
4307         j =  {0,1,2}  [size = nz = 6]
4308         v =  {4,5,6}  [size = nz = 6]
4309 
4310 .keywords: matrix, aij, compressed row, sparse, parallel
4311 
4312 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4313           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4314 @*/
4315 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4316 {
4317   PetscErrorCode ierr;
4318 
4319   PetscFunctionBegin;
4320   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4321   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4322   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4323   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4324   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4325   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4326   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4327   PetscFunctionReturn(0);
4328 }
4329 
4330 #undef __FUNCT__
4331 #define __FUNCT__ "MatCreateAIJ"
4332 /*@C
4333    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4334    (the default parallel PETSc format).  For good matrix assembly performance
4335    the user should preallocate the matrix storage by setting the parameters
4336    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4337    performance can be increased by more than a factor of 50.
4338 
4339    Collective on MPI_Comm
4340 
4341    Input Parameters:
4342 +  comm - MPI communicator
4343 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4344            This value should be the same as the local size used in creating the
4345            y vector for the matrix-vector product y = Ax.
4346 .  n - This value should be the same as the local size used in creating the
4347        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4348        calculated if N is given) For square matrices n is almost always m.
4349 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4350 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4351 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4352            (same value is used for all local rows)
4353 .  d_nnz - array containing the number of nonzeros in the various rows of the
4354            DIAGONAL portion of the local submatrix (possibly different for each row)
4355            or NULL, if d_nz is used to specify the nonzero structure.
4356            The size of this array is equal to the number of local rows, i.e 'm'.
4357 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4358            submatrix (same value is used for all local rows).
4359 -  o_nnz - array containing the number of nonzeros in the various rows of the
4360            OFF-DIAGONAL portion of the local submatrix (possibly different for
4361            each row) or NULL, if o_nz is used to specify the nonzero
4362            structure. The size of this array is equal to the number
4363            of local rows, i.e 'm'.
4364 
4365    Output Parameter:
4366 .  A - the matrix
4367 
4368    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4369    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4370    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4371 
4372    Notes:
4373    If the *_nnz parameter is given then the *_nz parameter is ignored
4374 
4375    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4376    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4377    storage requirements for this matrix.
4378 
4379    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4380    processor than it must be used on all processors that share the object for
4381    that argument.
4382 
4383    The user MUST specify either the local or global matrix dimensions
4384    (possibly both).
4385 
4386    The parallel matrix is partitioned across processors such that the
4387    first m0 rows belong to process 0, the next m1 rows belong to
4388    process 1, the next m2 rows belong to process 2 etc.. where
4389    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4390    values corresponding to [m x N] submatrix.
4391 
4392    The columns are logically partitioned with the n0 columns belonging
4393    to 0th partition, the next n1 columns belonging to the next
4394    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4395 
4396    The DIAGONAL portion of the local submatrix on any given processor
4397    is the submatrix corresponding to the rows and columns m,n
4398    corresponding to the given processor. i.e diagonal matrix on
4399    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4400    etc. The remaining portion of the local submatrix [m x (N-n)]
4401    constitute the OFF-DIAGONAL portion. The example below better
4402    illustrates this concept.
4403 
4404    For a square global matrix we define each processor's diagonal portion
4405    to be its local rows and the corresponding columns (a square submatrix);
4406    each processor's off-diagonal portion encompasses the remainder of the
4407    local matrix (a rectangular submatrix).
4408 
4409    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4410 
4411    When calling this routine with a single process communicator, a matrix of
4412    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4413    type of communicator, use the construction mechanism:
4414      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4415 
4416    By default, this format uses inodes (identical nodes) when possible.
4417    We search for consecutive rows with the same nonzero structure, thereby
4418    reusing matrix information to achieve increased efficiency.
4419 
4420    Options Database Keys:
4421 +  -mat_no_inode  - Do not use inodes
4422 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4423 -  -mat_aij_oneindex - Internally use indexing starting at 1
4424         rather than 0.  Note that when calling MatSetValues(),
4425         the user still MUST index entries starting at 0!
4426 
4427 
4428    Example usage:
4429 
4430    Consider the following 8x8 matrix with 34 non-zero values, that is
4431    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4432    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4433    as follows:
4434 
4435 .vb
4436             1  2  0  |  0  3  0  |  0  4
4437     Proc0   0  5  6  |  7  0  0  |  8  0
4438             9  0 10  | 11  0  0  | 12  0
4439     -------------------------------------
4440            13  0 14  | 15 16 17  |  0  0
4441     Proc1   0 18  0  | 19 20 21  |  0  0
4442             0  0  0  | 22 23  0  | 24  0
4443     -------------------------------------
4444     Proc2  25 26 27  |  0  0 28  | 29  0
4445            30  0  0  | 31 32 33  |  0 34
4446 .ve
4447 
4448    This can be represented as a collection of submatrices as:
4449 
4450 .vb
4451       A B C
4452       D E F
4453       G H I
4454 .ve
4455 
4456    Where the submatrices A,B,C are owned by proc0, D,E,F are
4457    owned by proc1, G,H,I are owned by proc2.
4458 
4459    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4460    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4461    The 'M','N' parameters are 8,8, and have the same values on all procs.
4462 
4463    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4464    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4465    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4466    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4467    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4468    matrix, ans [DF] as another SeqAIJ matrix.
4469 
4470    When d_nz, o_nz parameters are specified, d_nz storage elements are
4471    allocated for every row of the local diagonal submatrix, and o_nz
4472    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4473    One way to choose d_nz and o_nz is to use the max nonzerors per local
4474    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4475    In this case, the values of d_nz,o_nz are:
4476 .vb
4477      proc0 : dnz = 2, o_nz = 2
4478      proc1 : dnz = 3, o_nz = 2
4479      proc2 : dnz = 1, o_nz = 4
4480 .ve
4481    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4482    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4483    for proc3. i.e we are using 12+15+10=37 storage locations to store
4484    34 values.
4485 
4486    When d_nnz, o_nnz parameters are specified, the storage is specified
4487    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4488    In the above case the values for d_nnz,o_nnz are:
4489 .vb
4490      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4491      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4492      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4493 .ve
4494    Here the space allocated is sum of all the above values i.e 34, and
4495    hence pre-allocation is perfect.
4496 
4497    Level: intermediate
4498 
4499 .keywords: matrix, aij, compressed row, sparse, parallel
4500 
4501 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4502           MPIAIJ, MatCreateMPIAIJWithArrays()
4503 @*/
4504 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4505 {
4506   PetscErrorCode ierr;
4507   PetscMPIInt    size;
4508 
4509   PetscFunctionBegin;
4510   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4511   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4512   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4513   if (size > 1) {
4514     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4515     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4516   } else {
4517     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4518     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4519   }
4520   PetscFunctionReturn(0);
4521 }
4522 
4523 #undef __FUNCT__
4524 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4525 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4526 {
4527   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4528 
4529   PetscFunctionBegin;
4530   *Ad     = a->A;
4531   *Ao     = a->B;
4532   *colmap = a->garray;
4533   PetscFunctionReturn(0);
4534 }
4535 
4536 #undef __FUNCT__
4537 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4538 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4539 {
4540   PetscErrorCode ierr;
4541   PetscInt       i;
4542   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4543 
4544   PetscFunctionBegin;
4545   if (coloring->ctype == IS_COLORING_GLOBAL) {
4546     ISColoringValue *allcolors,*colors;
4547     ISColoring      ocoloring;
4548 
4549     /* set coloring for diagonal portion */
4550     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4551 
4552     /* set coloring for off-diagonal portion */
4553     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4554     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4555     for (i=0; i<a->B->cmap->n; i++) {
4556       colors[i] = allcolors[a->garray[i]];
4557     }
4558     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4559     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4560     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4561     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4562   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4563     ISColoringValue *colors;
4564     PetscInt        *larray;
4565     ISColoring      ocoloring;
4566 
4567     /* set coloring for diagonal portion */
4568     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4569     for (i=0; i<a->A->cmap->n; i++) {
4570       larray[i] = i + A->cmap->rstart;
4571     }
4572     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4573     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4574     for (i=0; i<a->A->cmap->n; i++) {
4575       colors[i] = coloring->colors[larray[i]];
4576     }
4577     ierr = PetscFree(larray);CHKERRQ(ierr);
4578     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4579     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4580     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4581 
4582     /* set coloring for off-diagonal portion */
4583     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4584     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4585     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4586     for (i=0; i<a->B->cmap->n; i++) {
4587       colors[i] = coloring->colors[larray[i]];
4588     }
4589     ierr = PetscFree(larray);CHKERRQ(ierr);
4590     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4591     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4592     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4593   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4594   PetscFunctionReturn(0);
4595 }
4596 
4597 #undef __FUNCT__
4598 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4599 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4600 {
4601   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4602   PetscErrorCode ierr;
4603 
4604   PetscFunctionBegin;
4605   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4606   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4607   PetscFunctionReturn(0);
4608 }
4609 
4610 #undef __FUNCT__
4611 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4612 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4613 {
4614   PetscErrorCode ierr;
4615   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4616   PetscInt       *indx;
4617 
4618   PetscFunctionBegin;
4619   /* This routine will ONLY return MPIAIJ type matrix */
4620   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4621   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4622   if (n == PETSC_DECIDE) {
4623     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4624   }
4625   /* Check sum(n) = N */
4626   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4627   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4628 
4629   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4630   rstart -= m;
4631 
4632   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4633   for (i=0; i<m; i++) {
4634     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4635     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4636     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4637   }
4638 
4639   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4640   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4641   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4642   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4643   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4644   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4645   PetscFunctionReturn(0);
4646 }
4647 
4648 #undef __FUNCT__
4649 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4650 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4651 {
4652   PetscErrorCode ierr;
4653   PetscInt       m,N,i,rstart,nnz,Ii;
4654   PetscInt       *indx;
4655   PetscScalar    *values;
4656 
4657   PetscFunctionBegin;
4658   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4659   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4660   for (i=0; i<m; i++) {
4661     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4662     Ii   = i + rstart;
4663     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4664     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4665   }
4666   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4667   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4668   PetscFunctionReturn(0);
4669 }
4670 
4671 #undef __FUNCT__
4672 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4673 /*@
4674       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4675                  matrices from each processor
4676 
4677     Collective on MPI_Comm
4678 
4679    Input Parameters:
4680 +    comm - the communicators the parallel matrix will live on
4681 .    inmat - the input sequential matrices
4682 .    n - number of local columns (or PETSC_DECIDE)
4683 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4684 
4685    Output Parameter:
4686 .    outmat - the parallel matrix generated
4687 
4688     Level: advanced
4689 
4690    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4691 
4692 @*/
4693 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4694 {
4695   PetscErrorCode ierr;
4696 
4697   PetscFunctionBegin;
4698   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4699   if (scall == MAT_INITIAL_MATRIX) {
4700     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4701   }
4702   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4703   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4704   PetscFunctionReturn(0);
4705 }
4706 
4707 #undef __FUNCT__
4708 #define __FUNCT__ "MatFileSplit"
4709 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4710 {
4711   PetscErrorCode    ierr;
4712   PetscMPIInt       rank;
4713   PetscInt          m,N,i,rstart,nnz;
4714   size_t            len;
4715   const PetscInt    *indx;
4716   PetscViewer       out;
4717   char              *name;
4718   Mat               B;
4719   const PetscScalar *values;
4720 
4721   PetscFunctionBegin;
4722   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4723   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4724   /* Should this be the type of the diagonal block of A? */
4725   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4726   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4727   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4728   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4729   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4730   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4731   for (i=0; i<m; i++) {
4732     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4733     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4734     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4735   }
4736   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4737   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4738 
4739   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4740   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4741   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4742   sprintf(name,"%s.%d",outfile,rank);
4743   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4744   ierr = PetscFree(name);CHKERRQ(ierr);
4745   ierr = MatView(B,out);CHKERRQ(ierr);
4746   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4747   ierr = MatDestroy(&B);CHKERRQ(ierr);
4748   PetscFunctionReturn(0);
4749 }
4750 
4751 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4752 #undef __FUNCT__
4753 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4754 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4755 {
4756   PetscErrorCode      ierr;
4757   Mat_Merge_SeqsToMPI *merge;
4758   PetscContainer      container;
4759 
4760   PetscFunctionBegin;
4761   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4762   if (container) {
4763     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4764     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4765     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4766     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4767     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4768     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4769     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4770     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4771     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4772     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4773     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4774     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4775     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4776     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4777     ierr = PetscFree(merge);CHKERRQ(ierr);
4778     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4779   }
4780   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4781   PetscFunctionReturn(0);
4782 }
4783 
4784 #include <../src/mat/utils/freespace.h>
4785 #include <petscbt.h>
4786 
4787 #undef __FUNCT__
4788 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4789 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4790 {
4791   PetscErrorCode      ierr;
4792   MPI_Comm            comm;
4793   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4794   PetscMPIInt         size,rank,taga,*len_s;
4795   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4796   PetscInt            proc,m;
4797   PetscInt            **buf_ri,**buf_rj;
4798   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4799   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4800   MPI_Request         *s_waits,*r_waits;
4801   MPI_Status          *status;
4802   MatScalar           *aa=a->a;
4803   MatScalar           **abuf_r,*ba_i;
4804   Mat_Merge_SeqsToMPI *merge;
4805   PetscContainer      container;
4806 
4807   PetscFunctionBegin;
4808   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4809   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4810 
4811   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4812   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4813 
4814   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4815   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4816 
4817   bi     = merge->bi;
4818   bj     = merge->bj;
4819   buf_ri = merge->buf_ri;
4820   buf_rj = merge->buf_rj;
4821 
4822   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4823   owners = merge->rowmap->range;
4824   len_s  = merge->len_s;
4825 
4826   /* send and recv matrix values */
4827   /*-----------------------------*/
4828   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4829   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4830 
4831   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4832   for (proc=0,k=0; proc<size; proc++) {
4833     if (!len_s[proc]) continue;
4834     i    = owners[proc];
4835     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4836     k++;
4837   }
4838 
4839   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4840   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4841   ierr = PetscFree(status);CHKERRQ(ierr);
4842 
4843   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4844   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4845 
4846   /* insert mat values of mpimat */
4847   /*----------------------------*/
4848   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4849   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4850 
4851   for (k=0; k<merge->nrecv; k++) {
4852     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4853     nrows       = *(buf_ri_k[k]);
4854     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4855     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4856   }
4857 
4858   /* set values of ba */
4859   m = merge->rowmap->n;
4860   for (i=0; i<m; i++) {
4861     arow = owners[rank] + i;
4862     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4863     bnzi = bi[i+1] - bi[i];
4864     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4865 
4866     /* add local non-zero vals of this proc's seqmat into ba */
4867     anzi   = ai[arow+1] - ai[arow];
4868     aj     = a->j + ai[arow];
4869     aa     = a->a + ai[arow];
4870     nextaj = 0;
4871     for (j=0; nextaj<anzi; j++) {
4872       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4873         ba_i[j] += aa[nextaj++];
4874       }
4875     }
4876 
4877     /* add received vals into ba */
4878     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4879       /* i-th row */
4880       if (i == *nextrow[k]) {
4881         anzi   = *(nextai[k]+1) - *nextai[k];
4882         aj     = buf_rj[k] + *(nextai[k]);
4883         aa     = abuf_r[k] + *(nextai[k]);
4884         nextaj = 0;
4885         for (j=0; nextaj<anzi; j++) {
4886           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4887             ba_i[j] += aa[nextaj++];
4888           }
4889         }
4890         nextrow[k]++; nextai[k]++;
4891       }
4892     }
4893     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4894   }
4895   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4896   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4897 
4898   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4899   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4900   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4901   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4902   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4903   PetscFunctionReturn(0);
4904 }
4905 
4906 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4907 
4908 #undef __FUNCT__
4909 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4910 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4911 {
4912   PetscErrorCode      ierr;
4913   Mat                 B_mpi;
4914   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4915   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4916   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4917   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4918   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4919   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4920   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4921   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4922   MPI_Status          *status;
4923   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4924   PetscBT             lnkbt;
4925   Mat_Merge_SeqsToMPI *merge;
4926   PetscContainer      container;
4927 
4928   PetscFunctionBegin;
4929   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4930 
4931   /* make sure it is a PETSc comm */
4932   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4933   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4934   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4935 
4936   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4937   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4938 
4939   /* determine row ownership */
4940   /*---------------------------------------------------------*/
4941   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4942   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4943   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4944   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4945   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4946   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4947   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4948 
4949   m      = merge->rowmap->n;
4950   owners = merge->rowmap->range;
4951 
4952   /* determine the number of messages to send, their lengths */
4953   /*---------------------------------------------------------*/
4954   len_s = merge->len_s;
4955 
4956   len          = 0; /* length of buf_si[] */
4957   merge->nsend = 0;
4958   for (proc=0; proc<size; proc++) {
4959     len_si[proc] = 0;
4960     if (proc == rank) {
4961       len_s[proc] = 0;
4962     } else {
4963       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4964       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4965     }
4966     if (len_s[proc]) {
4967       merge->nsend++;
4968       nrows = 0;
4969       for (i=owners[proc]; i<owners[proc+1]; i++) {
4970         if (ai[i+1] > ai[i]) nrows++;
4971       }
4972       len_si[proc] = 2*(nrows+1);
4973       len         += len_si[proc];
4974     }
4975   }
4976 
4977   /* determine the number and length of messages to receive for ij-structure */
4978   /*-------------------------------------------------------------------------*/
4979   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4980   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4981 
4982   /* post the Irecv of j-structure */
4983   /*-------------------------------*/
4984   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4985   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4986 
4987   /* post the Isend of j-structure */
4988   /*--------------------------------*/
4989   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4990 
4991   for (proc=0, k=0; proc<size; proc++) {
4992     if (!len_s[proc]) continue;
4993     i    = owners[proc];
4994     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4995     k++;
4996   }
4997 
4998   /* receives and sends of j-structure are complete */
4999   /*------------------------------------------------*/
5000   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
5001   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
5002 
5003   /* send and recv i-structure */
5004   /*---------------------------*/
5005   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
5006   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
5007 
5008   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
5009   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
5010   for (proc=0,k=0; proc<size; proc++) {
5011     if (!len_s[proc]) continue;
5012     /* form outgoing message for i-structure:
5013          buf_si[0]:                 nrows to be sent
5014                [1:nrows]:           row index (global)
5015                [nrows+1:2*nrows+1]: i-structure index
5016     */
5017     /*-------------------------------------------*/
5018     nrows       = len_si[proc]/2 - 1;
5019     buf_si_i    = buf_si + nrows+1;
5020     buf_si[0]   = nrows;
5021     buf_si_i[0] = 0;
5022     nrows       = 0;
5023     for (i=owners[proc]; i<owners[proc+1]; i++) {
5024       anzi = ai[i+1] - ai[i];
5025       if (anzi) {
5026         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
5027         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
5028         nrows++;
5029       }
5030     }
5031     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
5032     k++;
5033     buf_si += len_si[proc];
5034   }
5035 
5036   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
5037   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
5038 
5039   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
5040   for (i=0; i<merge->nrecv; i++) {
5041     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
5042   }
5043 
5044   ierr = PetscFree(len_si);CHKERRQ(ierr);
5045   ierr = PetscFree(len_ri);CHKERRQ(ierr);
5046   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
5047   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
5048   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
5049   ierr = PetscFree(buf_s);CHKERRQ(ierr);
5050   ierr = PetscFree(status);CHKERRQ(ierr);
5051 
5052   /* compute a local seq matrix in each processor */
5053   /*----------------------------------------------*/
5054   /* allocate bi array and free space for accumulating nonzero column info */
5055   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
5056   bi[0] = 0;
5057 
5058   /* create and initialize a linked list */
5059   nlnk = N+1;
5060   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5061 
5062   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5063   len  = ai[owners[rank+1]] - ai[owners[rank]];
5064   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
5065 
5066   current_space = free_space;
5067 
5068   /* determine symbolic info for each local row */
5069   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
5070 
5071   for (k=0; k<merge->nrecv; k++) {
5072     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5073     nrows       = *buf_ri_k[k];
5074     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5075     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5076   }
5077 
5078   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5079   len  = 0;
5080   for (i=0; i<m; i++) {
5081     bnzi = 0;
5082     /* add local non-zero cols of this proc's seqmat into lnk */
5083     arow  = owners[rank] + i;
5084     anzi  = ai[arow+1] - ai[arow];
5085     aj    = a->j + ai[arow];
5086     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5087     bnzi += nlnk;
5088     /* add received col data into lnk */
5089     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5090       if (i == *nextrow[k]) { /* i-th row */
5091         anzi  = *(nextai[k]+1) - *nextai[k];
5092         aj    = buf_rj[k] + *nextai[k];
5093         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5094         bnzi += nlnk;
5095         nextrow[k]++; nextai[k]++;
5096       }
5097     }
5098     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5099 
5100     /* if free space is not available, make more free space */
5101     if (current_space->local_remaining<bnzi) {
5102       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
5103       nspacedouble++;
5104     }
5105     /* copy data into free space, then initialize lnk */
5106     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5107     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5108 
5109     current_space->array           += bnzi;
5110     current_space->local_used      += bnzi;
5111     current_space->local_remaining -= bnzi;
5112 
5113     bi[i+1] = bi[i] + bnzi;
5114   }
5115 
5116   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5117 
5118   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
5119   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5120   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5121 
5122   /* create symbolic parallel matrix B_mpi */
5123   /*---------------------------------------*/
5124   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5125   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5126   if (n==PETSC_DECIDE) {
5127     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5128   } else {
5129     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5130   }
5131   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5132   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5133   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5134   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5135   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5136 
5137   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5138   B_mpi->assembled    = PETSC_FALSE;
5139   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5140   merge->bi           = bi;
5141   merge->bj           = bj;
5142   merge->buf_ri       = buf_ri;
5143   merge->buf_rj       = buf_rj;
5144   merge->coi          = NULL;
5145   merge->coj          = NULL;
5146   merge->owners_co    = NULL;
5147 
5148   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5149 
5150   /* attach the supporting struct to B_mpi for reuse */
5151   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5152   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5153   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5154   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5155   *mpimat = B_mpi;
5156 
5157   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5158   PetscFunctionReturn(0);
5159 }
5160 
5161 #undef __FUNCT__
5162 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
5163 /*@C
5164       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
5165                  matrices from each processor
5166 
5167     Collective on MPI_Comm
5168 
5169    Input Parameters:
5170 +    comm - the communicators the parallel matrix will live on
5171 .    seqmat - the input sequential matrices
5172 .    m - number of local rows (or PETSC_DECIDE)
5173 .    n - number of local columns (or PETSC_DECIDE)
5174 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5175 
5176    Output Parameter:
5177 .    mpimat - the parallel matrix generated
5178 
5179     Level: advanced
5180 
5181    Notes:
5182      The dimensions of the sequential matrix in each processor MUST be the same.
5183      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5184      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5185 @*/
5186 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5187 {
5188   PetscErrorCode ierr;
5189   PetscMPIInt    size;
5190 
5191   PetscFunctionBegin;
5192   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5193   if (size == 1) {
5194     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5195     if (scall == MAT_INITIAL_MATRIX) {
5196       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5197     } else {
5198       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5199     }
5200     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5201     PetscFunctionReturn(0);
5202   }
5203   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5204   if (scall == MAT_INITIAL_MATRIX) {
5205     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5206   }
5207   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5208   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5209   PetscFunctionReturn(0);
5210 }
5211 
5212 #undef __FUNCT__
5213 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5214 /*@
5215      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5216           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5217           with MatGetSize()
5218 
5219     Not Collective
5220 
5221    Input Parameters:
5222 +    A - the matrix
5223 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5224 
5225    Output Parameter:
5226 .    A_loc - the local sequential matrix generated
5227 
5228     Level: developer
5229 
5230 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5231 
5232 @*/
5233 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5234 {
5235   PetscErrorCode ierr;
5236   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5237   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5238   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5239   MatScalar      *aa=a->a,*ba=b->a,*cam;
5240   PetscScalar    *ca;
5241   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5242   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5243   PetscBool      match;
5244 
5245   PetscFunctionBegin;
5246   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5247   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5248   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5249   if (scall == MAT_INITIAL_MATRIX) {
5250     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5251     ci[0] = 0;
5252     for (i=0; i<am; i++) {
5253       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5254     }
5255     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5256     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5257     k    = 0;
5258     for (i=0; i<am; i++) {
5259       ncols_o = bi[i+1] - bi[i];
5260       ncols_d = ai[i+1] - ai[i];
5261       /* off-diagonal portion of A */
5262       for (jo=0; jo<ncols_o; jo++) {
5263         col = cmap[*bj];
5264         if (col >= cstart) break;
5265         cj[k]   = col; bj++;
5266         ca[k++] = *ba++;
5267       }
5268       /* diagonal portion of A */
5269       for (j=0; j<ncols_d; j++) {
5270         cj[k]   = cstart + *aj++;
5271         ca[k++] = *aa++;
5272       }
5273       /* off-diagonal portion of A */
5274       for (j=jo; j<ncols_o; j++) {
5275         cj[k]   = cmap[*bj++];
5276         ca[k++] = *ba++;
5277       }
5278     }
5279     /* put together the new matrix */
5280     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5281     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5282     /* Since these are PETSc arrays, change flags to free them as necessary. */
5283     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5284     mat->free_a  = PETSC_TRUE;
5285     mat->free_ij = PETSC_TRUE;
5286     mat->nonew   = 0;
5287   } else if (scall == MAT_REUSE_MATRIX) {
5288     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5289     ci = mat->i; cj = mat->j; cam = mat->a;
5290     for (i=0; i<am; i++) {
5291       /* off-diagonal portion of A */
5292       ncols_o = bi[i+1] - bi[i];
5293       for (jo=0; jo<ncols_o; jo++) {
5294         col = cmap[*bj];
5295         if (col >= cstart) break;
5296         *cam++ = *ba++; bj++;
5297       }
5298       /* diagonal portion of A */
5299       ncols_d = ai[i+1] - ai[i];
5300       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5301       /* off-diagonal portion of A */
5302       for (j=jo; j<ncols_o; j++) {
5303         *cam++ = *ba++; bj++;
5304       }
5305     }
5306   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5307   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5308   PetscFunctionReturn(0);
5309 }
5310 
5311 #undef __FUNCT__
5312 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5313 /*@C
5314      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5315 
5316     Not Collective
5317 
5318    Input Parameters:
5319 +    A - the matrix
5320 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5321 -    row, col - index sets of rows and columns to extract (or NULL)
5322 
5323    Output Parameter:
5324 .    A_loc - the local sequential matrix generated
5325 
5326     Level: developer
5327 
5328 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5329 
5330 @*/
5331 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5332 {
5333   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5334   PetscErrorCode ierr;
5335   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5336   IS             isrowa,iscola;
5337   Mat            *aloc;
5338   PetscBool      match;
5339 
5340   PetscFunctionBegin;
5341   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5342   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5343   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5344   if (!row) {
5345     start = A->rmap->rstart; end = A->rmap->rend;
5346     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5347   } else {
5348     isrowa = *row;
5349   }
5350   if (!col) {
5351     start = A->cmap->rstart;
5352     cmap  = a->garray;
5353     nzA   = a->A->cmap->n;
5354     nzB   = a->B->cmap->n;
5355     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5356     ncols = 0;
5357     for (i=0; i<nzB; i++) {
5358       if (cmap[i] < start) idx[ncols++] = cmap[i];
5359       else break;
5360     }
5361     imark = i;
5362     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5363     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5364     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5365   } else {
5366     iscola = *col;
5367   }
5368   if (scall != MAT_INITIAL_MATRIX) {
5369     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5370     aloc[0] = *A_loc;
5371   }
5372   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5373   *A_loc = aloc[0];
5374   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5375   if (!row) {
5376     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5377   }
5378   if (!col) {
5379     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5380   }
5381   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5382   PetscFunctionReturn(0);
5383 }
5384 
5385 #undef __FUNCT__
5386 #define __FUNCT__ "MatGetBrowsOfAcols"
5387 /*@C
5388     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5389 
5390     Collective on Mat
5391 
5392    Input Parameters:
5393 +    A,B - the matrices in mpiaij format
5394 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5395 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5396 
5397    Output Parameter:
5398 +    rowb, colb - index sets of rows and columns of B to extract
5399 -    B_seq - the sequential matrix generated
5400 
5401     Level: developer
5402 
5403 @*/
5404 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5405 {
5406   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5407   PetscErrorCode ierr;
5408   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5409   IS             isrowb,iscolb;
5410   Mat            *bseq=NULL;
5411 
5412   PetscFunctionBegin;
5413   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5414     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5415   }
5416   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5417 
5418   if (scall == MAT_INITIAL_MATRIX) {
5419     start = A->cmap->rstart;
5420     cmap  = a->garray;
5421     nzA   = a->A->cmap->n;
5422     nzB   = a->B->cmap->n;
5423     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5424     ncols = 0;
5425     for (i=0; i<nzB; i++) {  /* row < local row index */
5426       if (cmap[i] < start) idx[ncols++] = cmap[i];
5427       else break;
5428     }
5429     imark = i;
5430     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5431     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5432     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5433     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5434   } else {
5435     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5436     isrowb  = *rowb; iscolb = *colb;
5437     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5438     bseq[0] = *B_seq;
5439   }
5440   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5441   *B_seq = bseq[0];
5442   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5443   if (!rowb) {
5444     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5445   } else {
5446     *rowb = isrowb;
5447   }
5448   if (!colb) {
5449     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5450   } else {
5451     *colb = iscolb;
5452   }
5453   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5454   PetscFunctionReturn(0);
5455 }
5456 
5457 #undef __FUNCT__
5458 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5459 /*
5460     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5461     of the OFF-DIAGONAL portion of local A
5462 
5463     Collective on Mat
5464 
5465    Input Parameters:
5466 +    A,B - the matrices in mpiaij format
5467 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5468 
5469    Output Parameter:
5470 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5471 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5472 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5473 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5474 
5475     Level: developer
5476 
5477 */
5478 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5479 {
5480   VecScatter_MPI_General *gen_to,*gen_from;
5481   PetscErrorCode         ierr;
5482   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5483   Mat_SeqAIJ             *b_oth;
5484   VecScatter             ctx =a->Mvctx;
5485   MPI_Comm               comm;
5486   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5487   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5488   PetscScalar            *rvalues,*svalues;
5489   MatScalar              *b_otha,*bufa,*bufA;
5490   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5491   MPI_Request            *rwaits = NULL,*swaits = NULL;
5492   MPI_Status             *sstatus,rstatus;
5493   PetscMPIInt            jj;
5494   PetscInt               *cols,sbs,rbs;
5495   PetscScalar            *vals;
5496 
5497   PetscFunctionBegin;
5498   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5499   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5500     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5501   }
5502   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5503   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5504 
5505   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5506   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5507   rvalues  = gen_from->values; /* holds the length of receiving row */
5508   svalues  = gen_to->values;   /* holds the length of sending row */
5509   nrecvs   = gen_from->n;
5510   nsends   = gen_to->n;
5511 
5512   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5513   srow    = gen_to->indices;    /* local row index to be sent */
5514   sstarts = gen_to->starts;
5515   sprocs  = gen_to->procs;
5516   sstatus = gen_to->sstatus;
5517   sbs     = gen_to->bs;
5518   rstarts = gen_from->starts;
5519   rprocs  = gen_from->procs;
5520   rbs     = gen_from->bs;
5521 
5522   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5523   if (scall == MAT_INITIAL_MATRIX) {
5524     /* i-array */
5525     /*---------*/
5526     /*  post receives */
5527     for (i=0; i<nrecvs; i++) {
5528       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5529       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5530       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5531     }
5532 
5533     /* pack the outgoing message */
5534     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5535 
5536     sstartsj[0] = 0;
5537     rstartsj[0] = 0;
5538     len         = 0; /* total length of j or a array to be sent */
5539     k           = 0;
5540     for (i=0; i<nsends; i++) {
5541       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5542       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5543       for (j=0; j<nrows; j++) {
5544         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5545         for (l=0; l<sbs; l++) {
5546           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5547 
5548           rowlen[j*sbs+l] = ncols;
5549 
5550           len += ncols;
5551           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5552         }
5553         k++;
5554       }
5555       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5556 
5557       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5558     }
5559     /* recvs and sends of i-array are completed */
5560     i = nrecvs;
5561     while (i--) {
5562       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5563     }
5564     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5565 
5566     /* allocate buffers for sending j and a arrays */
5567     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5568     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5569 
5570     /* create i-array of B_oth */
5571     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5572 
5573     b_othi[0] = 0;
5574     len       = 0; /* total length of j or a array to be received */
5575     k         = 0;
5576     for (i=0; i<nrecvs; i++) {
5577       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5578       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5579       for (j=0; j<nrows; j++) {
5580         b_othi[k+1] = b_othi[k] + rowlen[j];
5581         len        += rowlen[j]; k++;
5582       }
5583       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5584     }
5585 
5586     /* allocate space for j and a arrrays of B_oth */
5587     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5588     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5589 
5590     /* j-array */
5591     /*---------*/
5592     /*  post receives of j-array */
5593     for (i=0; i<nrecvs; i++) {
5594       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5595       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5596     }
5597 
5598     /* pack the outgoing message j-array */
5599     k = 0;
5600     for (i=0; i<nsends; i++) {
5601       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5602       bufJ  = bufj+sstartsj[i];
5603       for (j=0; j<nrows; j++) {
5604         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5605         for (ll=0; ll<sbs; ll++) {
5606           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5607           for (l=0; l<ncols; l++) {
5608             *bufJ++ = cols[l];
5609           }
5610           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5611         }
5612       }
5613       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5614     }
5615 
5616     /* recvs and sends of j-array are completed */
5617     i = nrecvs;
5618     while (i--) {
5619       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5620     }
5621     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5622   } else if (scall == MAT_REUSE_MATRIX) {
5623     sstartsj = *startsj_s;
5624     rstartsj = *startsj_r;
5625     bufa     = *bufa_ptr;
5626     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5627     b_otha   = b_oth->a;
5628   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5629 
5630   /* a-array */
5631   /*---------*/
5632   /*  post receives of a-array */
5633   for (i=0; i<nrecvs; i++) {
5634     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5635     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5636   }
5637 
5638   /* pack the outgoing message a-array */
5639   k = 0;
5640   for (i=0; i<nsends; i++) {
5641     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5642     bufA  = bufa+sstartsj[i];
5643     for (j=0; j<nrows; j++) {
5644       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5645       for (ll=0; ll<sbs; ll++) {
5646         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5647         for (l=0; l<ncols; l++) {
5648           *bufA++ = vals[l];
5649         }
5650         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5651       }
5652     }
5653     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5654   }
5655   /* recvs and sends of a-array are completed */
5656   i = nrecvs;
5657   while (i--) {
5658     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5659   }
5660   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5661   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5662 
5663   if (scall == MAT_INITIAL_MATRIX) {
5664     /* put together the new matrix */
5665     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5666 
5667     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5668     /* Since these are PETSc arrays, change flags to free them as necessary. */
5669     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5670     b_oth->free_a  = PETSC_TRUE;
5671     b_oth->free_ij = PETSC_TRUE;
5672     b_oth->nonew   = 0;
5673 
5674     ierr = PetscFree(bufj);CHKERRQ(ierr);
5675     if (!startsj_s || !bufa_ptr) {
5676       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5677       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5678     } else {
5679       *startsj_s = sstartsj;
5680       *startsj_r = rstartsj;
5681       *bufa_ptr  = bufa;
5682     }
5683   }
5684   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5685   PetscFunctionReturn(0);
5686 }
5687 
5688 #undef __FUNCT__
5689 #define __FUNCT__ "MatGetCommunicationStructs"
5690 /*@C
5691   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5692 
5693   Not Collective
5694 
5695   Input Parameters:
5696 . A - The matrix in mpiaij format
5697 
5698   Output Parameter:
5699 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5700 . colmap - A map from global column index to local index into lvec
5701 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5702 
5703   Level: developer
5704 
5705 @*/
5706 #if defined(PETSC_USE_CTABLE)
5707 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5708 #else
5709 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5710 #endif
5711 {
5712   Mat_MPIAIJ *a;
5713 
5714   PetscFunctionBegin;
5715   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5716   PetscValidPointer(lvec, 2);
5717   PetscValidPointer(colmap, 3);
5718   PetscValidPointer(multScatter, 4);
5719   a = (Mat_MPIAIJ*) A->data;
5720   if (lvec) *lvec = a->lvec;
5721   if (colmap) *colmap = a->colmap;
5722   if (multScatter) *multScatter = a->Mvctx;
5723   PetscFunctionReturn(0);
5724 }
5725 
5726 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5727 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5728 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5729 
5730 #undef __FUNCT__
5731 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5732 /*
5733     Computes (B'*A')' since computing B*A directly is untenable
5734 
5735                n                       p                          p
5736         (              )       (              )         (                  )
5737       m (      A       )  *  n (       B      )   =   m (         C        )
5738         (              )       (              )         (                  )
5739 
5740 */
5741 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5742 {
5743   PetscErrorCode ierr;
5744   Mat            At,Bt,Ct;
5745 
5746   PetscFunctionBegin;
5747   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5748   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5749   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5750   ierr = MatDestroy(&At);CHKERRQ(ierr);
5751   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5752   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5753   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5754   PetscFunctionReturn(0);
5755 }
5756 
5757 #undef __FUNCT__
5758 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5759 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5760 {
5761   PetscErrorCode ierr;
5762   PetscInt       m=A->rmap->n,n=B->cmap->n;
5763   Mat            Cmat;
5764 
5765   PetscFunctionBegin;
5766   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5767   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5768   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5769   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5770   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5771   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5772   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5773   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5774 
5775   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5776 
5777   *C = Cmat;
5778   PetscFunctionReturn(0);
5779 }
5780 
5781 /* ----------------------------------------------------------------*/
5782 #undef __FUNCT__
5783 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5784 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5785 {
5786   PetscErrorCode ierr;
5787 
5788   PetscFunctionBegin;
5789   if (scall == MAT_INITIAL_MATRIX) {
5790     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5791     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5792     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5793   }
5794   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5795   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5796   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5797   PetscFunctionReturn(0);
5798 }
5799 
5800 #if defined(PETSC_HAVE_MUMPS)
5801 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5802 #endif
5803 #if defined(PETSC_HAVE_PASTIX)
5804 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5805 #endif
5806 #if defined(PETSC_HAVE_SUPERLU_DIST)
5807 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5808 #endif
5809 #if defined(PETSC_HAVE_CLIQUE)
5810 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5811 #endif
5812 
5813 /*MC
5814    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5815 
5816    Options Database Keys:
5817 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5818 
5819   Level: beginner
5820 
5821 .seealso: MatCreateAIJ()
5822 M*/
5823 
5824 #undef __FUNCT__
5825 #define __FUNCT__ "MatCreate_MPIAIJ"
5826 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5827 {
5828   Mat_MPIAIJ     *b;
5829   PetscErrorCode ierr;
5830   PetscMPIInt    size;
5831 
5832   PetscFunctionBegin;
5833   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5834 
5835   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5836   B->data       = (void*)b;
5837   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5838   B->assembled  = PETSC_FALSE;
5839   B->insertmode = NOT_SET_VALUES;
5840   b->size       = size;
5841 
5842   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5843 
5844   /* build cache for off array entries formed */
5845   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5846 
5847   b->donotstash  = PETSC_FALSE;
5848   b->colmap      = 0;
5849   b->garray      = 0;
5850   b->roworiented = PETSC_TRUE;
5851 
5852   /* stuff used for matrix vector multiply */
5853   b->lvec  = NULL;
5854   b->Mvctx = NULL;
5855 
5856   /* stuff for MatGetRow() */
5857   b->rowindices   = 0;
5858   b->rowvalues    = 0;
5859   b->getrowactive = PETSC_FALSE;
5860 
5861   /* flexible pointer used in CUSP/CUSPARSE classes */
5862   b->spptr = NULL;
5863 
5864 #if defined(PETSC_HAVE_MUMPS)
5865   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5866 #endif
5867 #if defined(PETSC_HAVE_PASTIX)
5868   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5869 #endif
5870 #if defined(PETSC_HAVE_SUPERLU_DIST)
5871   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5872 #endif
5873 #if defined(PETSC_HAVE_CLIQUE)
5874   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5875 #endif
5876   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5877   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5878   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5879   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5880   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5881   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5882   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5883   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5884   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5885   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5886   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5887   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5888   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5889   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5890   PetscFunctionReturn(0);
5891 }
5892 
5893 #undef __FUNCT__
5894 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5895 /*@
5896      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5897          and "off-diagonal" part of the matrix in CSR format.
5898 
5899    Collective on MPI_Comm
5900 
5901    Input Parameters:
5902 +  comm - MPI communicator
5903 .  m - number of local rows (Cannot be PETSC_DECIDE)
5904 .  n - This value should be the same as the local size used in creating the
5905        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5906        calculated if N is given) For square matrices n is almost always m.
5907 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5908 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5909 .   i - row indices for "diagonal" portion of matrix
5910 .   j - column indices
5911 .   a - matrix values
5912 .   oi - row indices for "off-diagonal" portion of matrix
5913 .   oj - column indices
5914 -   oa - matrix values
5915 
5916    Output Parameter:
5917 .   mat - the matrix
5918 
5919    Level: advanced
5920 
5921    Notes:
5922        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5923        must free the arrays once the matrix has been destroyed and not before.
5924 
5925        The i and j indices are 0 based
5926 
5927        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5928 
5929        This sets local rows and cannot be used to set off-processor values.
5930 
5931        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5932        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5933        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5934        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5935        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5936        communication if it is known that only local entries will be set.
5937 
5938 .keywords: matrix, aij, compressed row, sparse, parallel
5939 
5940 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5941           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5942 @*/
5943 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5944 {
5945   PetscErrorCode ierr;
5946   Mat_MPIAIJ     *maij;
5947 
5948   PetscFunctionBegin;
5949   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5950   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5951   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5952   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5953   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5954   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5955   maij = (Mat_MPIAIJ*) (*mat)->data;
5956 
5957   (*mat)->preallocated = PETSC_TRUE;
5958 
5959   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5960   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5961 
5962   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5963   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5964 
5965   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5966   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5967   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5968   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5969 
5970   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5971   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5972   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5973   PetscFunctionReturn(0);
5974 }
5975 
5976 /*
5977     Special version for direct calls from Fortran
5978 */
5979 #include <petsc-private/fortranimpl.h>
5980 
5981 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5982 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5983 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5984 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5985 #endif
5986 
5987 /* Change these macros so can be used in void function */
5988 #undef CHKERRQ
5989 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5990 #undef SETERRQ2
5991 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5992 #undef SETERRQ3
5993 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5994 #undef SETERRQ
5995 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5996 
5997 #undef __FUNCT__
5998 #define __FUNCT__ "matsetvaluesmpiaij_"
5999 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6000 {
6001   Mat            mat  = *mmat;
6002   PetscInt       m    = *mm, n = *mn;
6003   InsertMode     addv = *maddv;
6004   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6005   PetscScalar    value;
6006   PetscErrorCode ierr;
6007 
6008   MatCheckPreallocated(mat,1);
6009   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6010 
6011 #if defined(PETSC_USE_DEBUG)
6012   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6013 #endif
6014   {
6015     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6016     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6017     PetscBool roworiented = aij->roworiented;
6018 
6019     /* Some Variables required in the macro */
6020     Mat        A                 = aij->A;
6021     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6022     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6023     MatScalar  *aa               = a->a;
6024     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6025     Mat        B                 = aij->B;
6026     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6027     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6028     MatScalar  *ba               = b->a;
6029 
6030     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6031     PetscInt  nonew = a->nonew;
6032     MatScalar *ap1,*ap2;
6033 
6034     PetscFunctionBegin;
6035     for (i=0; i<m; i++) {
6036       if (im[i] < 0) continue;
6037 #if defined(PETSC_USE_DEBUG)
6038       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6039 #endif
6040       if (im[i] >= rstart && im[i] < rend) {
6041         row      = im[i] - rstart;
6042         lastcol1 = -1;
6043         rp1      = aj + ai[row];
6044         ap1      = aa + ai[row];
6045         rmax1    = aimax[row];
6046         nrow1    = ailen[row];
6047         low1     = 0;
6048         high1    = nrow1;
6049         lastcol2 = -1;
6050         rp2      = bj + bi[row];
6051         ap2      = ba + bi[row];
6052         rmax2    = bimax[row];
6053         nrow2    = bilen[row];
6054         low2     = 0;
6055         high2    = nrow2;
6056 
6057         for (j=0; j<n; j++) {
6058           if (roworiented) value = v[i*n+j];
6059           else value = v[i+j*m];
6060           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
6061           if (in[j] >= cstart && in[j] < cend) {
6062             col = in[j] - cstart;
6063             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
6064           } else if (in[j] < 0) continue;
6065 #if defined(PETSC_USE_DEBUG)
6066           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6067 #endif
6068           else {
6069             if (mat->was_assembled) {
6070               if (!aij->colmap) {
6071                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6072               }
6073 #if defined(PETSC_USE_CTABLE)
6074               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6075               col--;
6076 #else
6077               col = aij->colmap[in[j]] - 1;
6078 #endif
6079               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6080                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6081                 col  =  in[j];
6082                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6083                 B     = aij->B;
6084                 b     = (Mat_SeqAIJ*)B->data;
6085                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6086                 rp2   = bj + bi[row];
6087                 ap2   = ba + bi[row];
6088                 rmax2 = bimax[row];
6089                 nrow2 = bilen[row];
6090                 low2  = 0;
6091                 high2 = nrow2;
6092                 bm    = aij->B->rmap->n;
6093                 ba    = b->a;
6094               }
6095             } else col = in[j];
6096             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
6097           }
6098         }
6099       } else if (!aij->donotstash) {
6100         if (roworiented) {
6101           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6102         } else {
6103           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6104         }
6105       }
6106     }
6107   }
6108   PetscFunctionReturnVoid();
6109 }
6110 
6111