xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 904d1e7050d8278cb5f67df70e5edfa4eaab048b)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,i,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (i=0; i<mat_i[j+1]-mat_i[j]; i++) {
660       col = i + mat_i[j];
661       /* If column is in the diagonal */
662       if (mat_j[col] >= cstart && mat_j[col] < cend) {
663         aj[diag_so_far++] = mat_j[col] - cstart;
664         dnz++;
665       } else { /* off-diagonal entries */
666         bj[offd_so_far++] = mat_j[col];
667         onz++;
668       }
669     }
670     ailen[j] = dnz;
671     bilen[j] = onz;
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 /*
677     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
678     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
679     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
680 */
681 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[],
682                                                      const PetscInt full_diag_i[],const PetscInt full_offd_i[])
683 {
684   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
685   Mat            A      = aij->A; /* diagonal part of the matrix */
686   Mat            B      = aij->B; /* offdiagonal part of the matrix */
687   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
688   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
689   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
690   PetscInt       *ailen = a->ilen,*aj = a->j;
691   PetscInt       *bilen = b->ilen,*bj = b->j;
692   PetscInt       am     = aij->A->rmap->n,i,j;
693   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
694   PetscScalar    *aa = a->a,*ba = b->a;
695 
696   PetscFunctionBegin;
697   /* Iterate over all rows of the matrix */
698   for (j=0; j<am; j++) {
699     dnz_row = onz_row = 0;
700     /*  Iterate over all non-zero columns of the current row */
701     for (i=0; i<mat_i[j+1]-mat_i[j]; i++) {
702       col = i + mat_i[j];
703       rowstart_offd = full_offd_i[j];
704       rowstart_diag = full_diag_i[j];
705       /* If column is in the diagonal */
706       if (mat_j[col] >= cstart && mat_j[col] < cend) {
707         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
708         aa[rowstart_diag+dnz_row] = mat_a[col];
709         dnz_row++;
710       } else { /* off-diagonal entries */
711         bj[rowstart_offd+onz_row] = mat_j[col];
712         ba[rowstart_offd+onz_row] = mat_a[col];
713         onz_row++;
714       }
715     }
716     ailen[j] = dnz_row;
717     bilen[j] = onz_row;
718   }
719   PetscFunctionReturn(0);
720 }
721 
722 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
723 {
724   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
725   PetscErrorCode ierr;
726   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
727   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
728 
729   PetscFunctionBegin;
730   for (i=0; i<m; i++) {
731     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
732     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
733     if (idxm[i] >= rstart && idxm[i] < rend) {
734       row = idxm[i] - rstart;
735       for (j=0; j<n; j++) {
736         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
737         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
738         if (idxn[j] >= cstart && idxn[j] < cend) {
739           col  = idxn[j] - cstart;
740           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
741         } else {
742           if (!aij->colmap) {
743             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
744           }
745 #if defined(PETSC_USE_CTABLE)
746           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
747           col--;
748 #else
749           col = aij->colmap[idxn[j]] - 1;
750 #endif
751           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
752           else {
753             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
754           }
755         }
756       }
757     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
763 
764 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
765 {
766   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
767   PetscErrorCode ierr;
768   PetscInt       nstash,reallocs;
769 
770   PetscFunctionBegin;
771   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
772 
773   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
774   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
775   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
776   PetscFunctionReturn(0);
777 }
778 
779 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
780 {
781   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
782   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
783   PetscErrorCode ierr;
784   PetscMPIInt    n;
785   PetscInt       i,j,rstart,ncols,flg;
786   PetscInt       *row,*col;
787   PetscBool      other_disassembled;
788   PetscScalar    *val;
789 
790   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
791 
792   PetscFunctionBegin;
793   if (!aij->donotstash && !mat->nooffprocentries) {
794     while (1) {
795       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
796       if (!flg) break;
797 
798       for (i=0; i<n; ) {
799         /* Now identify the consecutive vals belonging to the same row */
800         for (j=i,rstart=row[j]; j<n; j++) {
801           if (row[j] != rstart) break;
802         }
803         if (j < n) ncols = j-i;
804         else       ncols = n-i;
805         /* Now assemble all these values with a single function call */
806         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
807 
808         i = j;
809       }
810     }
811     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
812   }
813   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
814   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
815 
816   /* determine if any processor has disassembled, if so we must
817      also disassemble ourselfs, in order that we may reassemble. */
818   /*
819      if nonzero structure of submatrix B cannot change then we know that
820      no processor disassembled thus we can skip this stuff
821   */
822   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
823     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
824     if (mat->was_assembled && !other_disassembled) {
825       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
826     }
827   }
828   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
829     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
830   }
831   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
832   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
833   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
834 
835   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
836 
837   aij->rowvalues = 0;
838 
839   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
840   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
841 
842   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
843   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
844     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
845     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
846   }
847   PetscFunctionReturn(0);
848 }
849 
850 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
851 {
852   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
853   PetscErrorCode ierr;
854 
855   PetscFunctionBegin;
856   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
857   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
862 {
863   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
864   PetscInt      *lrows;
865   PetscInt       r, len;
866   PetscBool      cong;
867   PetscErrorCode ierr;
868 
869   PetscFunctionBegin;
870   /* get locally owned rows */
871   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
872   /* fix right hand side if needed */
873   if (x && b) {
874     const PetscScalar *xx;
875     PetscScalar       *bb;
876 
877     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
878     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
879     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
880     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
881     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
882   }
883   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
884   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
885   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
886   if ((diag != 0.0) && cong) {
887     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
888   } else if (diag != 0.0) {
889     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
890     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
891     for (r = 0; r < len; ++r) {
892       const PetscInt row = lrows[r] + A->rmap->rstart;
893       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
894     }
895     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
896     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897   } else {
898     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
899   }
900   ierr = PetscFree(lrows);CHKERRQ(ierr);
901 
902   /* only change matrix nonzero state if pattern was allowed to be changed */
903   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
904     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
905     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
906   }
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
913   PetscErrorCode    ierr;
914   PetscMPIInt       n = A->rmap->n;
915   PetscInt          i,j,r,m,p = 0,len = 0;
916   PetscInt          *lrows,*owners = A->rmap->range;
917   PetscSFNode       *rrows;
918   PetscSF           sf;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 
926   PetscFunctionBegin;
927   /* Create SF where leaves are input rows and roots are owned rows */
928   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
929   for (r = 0; r < n; ++r) lrows[r] = -1;
930   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
931   for (r = 0; r < N; ++r) {
932     const PetscInt idx   = rows[r];
933     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
934     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
935       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
936     }
937     rrows[r].rank  = p;
938     rrows[r].index = rows[r] - owners[p];
939   }
940   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
941   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
942   /* Collect flags for rows to be zeroed */
943   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
944   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
945   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
946   /* Compress and put in row numbers */
947   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
948   /* zero diagonal part of matrix */
949   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
950   /* handle off diagonal part of matrix */
951   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
952   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
953   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
954   for (i=0; i<len; i++) bb[lrows[i]] = 1;
955   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
956   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
957   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
959   if (x) {
960     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
963     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
964   }
965   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
966   /* remove zeroed rows of off diagonal matrix */
967   ii = aij->i;
968   for (i=0; i<len; i++) {
969     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
970   }
971   /* loop over all elements of off process part of matrix zeroing removed columns*/
972   if (aij->compressedrow.use) {
973     m    = aij->compressedrow.nrows;
974     ii   = aij->compressedrow.i;
975     ridx = aij->compressedrow.rindex;
976     for (i=0; i<m; i++) {
977       n  = ii[i+1] - ii[i];
978       aj = aij->j + ii[i];
979       aa = aij->a + ii[i];
980 
981       for (j=0; j<n; j++) {
982         if (PetscAbsScalar(mask[*aj])) {
983           if (b) bb[*ridx] -= *aa*xx[*aj];
984           *aa = 0.0;
985         }
986         aa++;
987         aj++;
988       }
989       ridx++;
990     }
991   } else { /* do not use compressed row format */
992     m = l->B->rmap->n;
993     for (i=0; i<m; i++) {
994       n  = ii[i+1] - ii[i];
995       aj = aij->j + ii[i];
996       aa = aij->a + ii[i];
997       for (j=0; j<n; j++) {
998         if (PetscAbsScalar(mask[*aj])) {
999           if (b) bb[i] -= *aa*xx[*aj];
1000           *aa = 0.0;
1001         }
1002         aa++;
1003         aj++;
1004       }
1005     }
1006   }
1007   if (x) {
1008     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1009     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1010   }
1011   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1012   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1013   ierr = PetscFree(lrows);CHKERRQ(ierr);
1014 
1015   /* only change matrix nonzero state if pattern was allowed to be changed */
1016   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1017     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1018     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1019   }
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1024 {
1025   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1026   PetscErrorCode ierr;
1027   PetscInt       nt;
1028   VecScatter     Mvctx = a->Mvctx;
1029 
1030   PetscFunctionBegin;
1031   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1032   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1033 
1034   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1036   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1042 {
1043   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1044   PetscErrorCode ierr;
1045 
1046   PetscFunctionBegin;
1047   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1052 {
1053   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1054   PetscErrorCode ierr;
1055   VecScatter     Mvctx = a->Mvctx;
1056 
1057   PetscFunctionBegin;
1058   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1059   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1060   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1061   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1062   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1067 {
1068   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1069   PetscErrorCode ierr;
1070   PetscBool      merged;
1071 
1072   PetscFunctionBegin;
1073   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1074   /* do nondiagonal part */
1075   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1076   if (!merged) {
1077     /* send it on its way */
1078     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1079     /* do local part */
1080     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1081     /* receive remote parts: note this assumes the values are not actually */
1082     /* added in yy until the next line, */
1083     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1084   } else {
1085     /* do local part */
1086     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1087     /* send it on its way */
1088     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1089     /* values actually were received in the Begin() but we need to call this nop */
1090     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1096 {
1097   MPI_Comm       comm;
1098   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1099   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1100   IS             Me,Notme;
1101   PetscErrorCode ierr;
1102   PetscInt       M,N,first,last,*notme,i;
1103   PetscMPIInt    size;
1104 
1105   PetscFunctionBegin;
1106   /* Easy test: symmetric diagonal block */
1107   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1108   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1109   if (!*f) PetscFunctionReturn(0);
1110   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1111   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1112   if (size == 1) PetscFunctionReturn(0);
1113 
1114   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1115   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1116   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1117   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1118   for (i=0; i<first; i++) notme[i] = i;
1119   for (i=last; i<M; i++) notme[i-last+first] = i;
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1121   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1122   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1123   Aoff = Aoffs[0];
1124   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1125   Boff = Boffs[0];
1126   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1127   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1128   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1129   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1130   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1131   ierr = PetscFree(notme);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1136 {
1137   PetscErrorCode ierr;
1138 
1139   PetscFunctionBegin;
1140   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1145 {
1146   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1147   PetscErrorCode ierr;
1148 
1149   PetscFunctionBegin;
1150   /* do nondiagonal part */
1151   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1152   /* send it on its way */
1153   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1154   /* do local part */
1155   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1156   /* receive remote parts */
1157   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 /*
1162   This only works correctly for square matrices where the subblock A->A is the
1163    diagonal block
1164 */
1165 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1166 {
1167   PetscErrorCode ierr;
1168   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1169 
1170   PetscFunctionBegin;
1171   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1172   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1173   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1178 {
1179   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1184   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1189 {
1190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194 #if defined(PETSC_USE_LOG)
1195   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1196 #endif
1197   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1198   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1199   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1200   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1201 #if defined(PETSC_USE_CTABLE)
1202   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1203 #else
1204   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1205 #endif
1206   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1207   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1208   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1209   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1210   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1211   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1212   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1213 
1214   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1223 #if defined(PETSC_HAVE_ELEMENTAL)
1224   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1225 #endif
1226 #if defined(PETSC_HAVE_HYPRE)
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1229 #endif
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1236 {
1237   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1238   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1239   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1240   PetscErrorCode ierr;
1241   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1242   int            fd;
1243   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1244   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1245   PetscScalar    *column_values;
1246   PetscInt       message_count,flowcontrolcount;
1247   FILE           *file;
1248 
1249   PetscFunctionBegin;
1250   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1251   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1252   nz   = A->nz + B->nz;
1253   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1254   if (!rank) {
1255     header[0] = MAT_FILE_CLASSID;
1256     header[1] = mat->rmap->N;
1257     header[2] = mat->cmap->N;
1258 
1259     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1260     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1261     /* get largest number of rows any processor has */
1262     rlen  = mat->rmap->n;
1263     range = mat->rmap->range;
1264     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1265   } else {
1266     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267     rlen = mat->rmap->n;
1268   }
1269 
1270   /* load up the local row counts */
1271   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1272   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1273 
1274   /* store the row lengths to the file */
1275   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1276   if (!rank) {
1277     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1278     for (i=1; i<size; i++) {
1279       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1280       rlen = range[i+1] - range[i];
1281       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1283     }
1284     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1285   } else {
1286     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1287     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1288     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1289   }
1290   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1291 
1292   /* load up the local column indices */
1293   nzmax = nz; /* th processor needs space a largest processor needs */
1294   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1296   cnt   = 0;
1297   for (i=0; i<mat->rmap->n; i++) {
1298     for (j=B->i[i]; j<B->i[i+1]; j++) {
1299       if ((col = garray[B->j[j]]) > cstart) break;
1300       column_indices[cnt++] = col;
1301     }
1302     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1303     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1304   }
1305   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1306 
1307   /* store the column indices to the file */
1308   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1309   if (!rank) {
1310     MPI_Status status;
1311     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1312     for (i=1; i<size; i++) {
1313       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1314       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1315       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1316       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1317       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1318     }
1319     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1320   } else {
1321     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1322     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1323     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1324     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1325   }
1326   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1327 
1328   /* load up the local column values */
1329   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1330   cnt  = 0;
1331   for (i=0; i<mat->rmap->n; i++) {
1332     for (j=B->i[i]; j<B->i[i+1]; j++) {
1333       if (garray[B->j[j]] > cstart) break;
1334       column_values[cnt++] = B->a[j];
1335     }
1336     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1337     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1338   }
1339   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1340 
1341   /* store the column values to the file */
1342   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1343   if (!rank) {
1344     MPI_Status status;
1345     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1346     for (i=1; i<size; i++) {
1347       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1348       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1349       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1350       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1351       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1352     }
1353     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1354   } else {
1355     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1356     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1357     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1358     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1359   }
1360   ierr = PetscFree(column_values);CHKERRQ(ierr);
1361 
1362   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1363   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #include <petscdraw.h>
1368 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1369 {
1370   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1371   PetscErrorCode    ierr;
1372   PetscMPIInt       rank = aij->rank,size = aij->size;
1373   PetscBool         isdraw,iascii,isbinary;
1374   PetscViewer       sviewer;
1375   PetscViewerFormat format;
1376 
1377   PetscFunctionBegin;
1378   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1379   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1380   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1381   if (iascii) {
1382     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1383     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1384       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1385       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1386       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1387       for (i=0; i<(PetscInt)size; i++) {
1388         nmax = PetscMax(nmax,nz[i]);
1389         nmin = PetscMin(nmin,nz[i]);
1390         navg += nz[i];
1391       }
1392       ierr = PetscFree(nz);CHKERRQ(ierr);
1393       navg = navg/size;
1394       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1395       PetscFunctionReturn(0);
1396     }
1397     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1398     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1399       MatInfo   info;
1400       PetscBool inodes;
1401 
1402       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1403       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1404       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1405       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1406       if (!inodes) {
1407         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1408                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1409       } else {
1410         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1411                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1412       }
1413       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1414       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1415       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1416       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1417       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1418       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1420       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1421       PetscFunctionReturn(0);
1422     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1423       PetscInt inodecount,inodelimit,*inodes;
1424       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1425       if (inodes) {
1426         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1427       } else {
1428         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1429       }
1430       PetscFunctionReturn(0);
1431     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1432       PetscFunctionReturn(0);
1433     }
1434   } else if (isbinary) {
1435     if (size == 1) {
1436       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1437       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1438     } else {
1439       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1440     }
1441     PetscFunctionReturn(0);
1442   } else if (isdraw) {
1443     PetscDraw draw;
1444     PetscBool isnull;
1445     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1446     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1447     if (isnull) PetscFunctionReturn(0);
1448   }
1449 
1450   {
1451     /* assemble the entire matrix onto first processor. */
1452     Mat        A;
1453     Mat_SeqAIJ *Aloc;
1454     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1455     MatScalar  *a;
1456 
1457     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1458     if (!rank) {
1459       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1460     } else {
1461       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1462     }
1463     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1464     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1465     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1466     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1467     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1468 
1469     /* copy over the A part */
1470     Aloc = (Mat_SeqAIJ*)aij->A->data;
1471     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1472     row  = mat->rmap->rstart;
1473     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1474     for (i=0; i<m; i++) {
1475       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1476       row++;
1477       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1478     }
1479     aj = Aloc->j;
1480     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1481 
1482     /* copy over the B part */
1483     Aloc = (Mat_SeqAIJ*)aij->B->data;
1484     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1485     row  = mat->rmap->rstart;
1486     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1487     ct   = cols;
1488     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1489     for (i=0; i<m; i++) {
1490       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1491       row++;
1492       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1493     }
1494     ierr = PetscFree(ct);CHKERRQ(ierr);
1495     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1496     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1497     /*
1498        Everyone has to call to draw the matrix since the graphics waits are
1499        synchronized across all processors that share the PetscDraw object
1500     */
1501     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1502     if (!rank) {
1503       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1504       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1505     }
1506     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1507     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1508     ierr = MatDestroy(&A);CHKERRQ(ierr);
1509   }
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1514 {
1515   PetscErrorCode ierr;
1516   PetscBool      iascii,isdraw,issocket,isbinary;
1517 
1518   PetscFunctionBegin;
1519   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1521   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1522   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1523   if (iascii || isdraw || isbinary || issocket) {
1524     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1525   }
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1530 {
1531   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1532   PetscErrorCode ierr;
1533   Vec            bb1 = 0;
1534   PetscBool      hasop;
1535 
1536   PetscFunctionBegin;
1537   if (flag == SOR_APPLY_UPPER) {
1538     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1539     PetscFunctionReturn(0);
1540   }
1541 
1542   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1543     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1544   }
1545 
1546   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1547     if (flag & SOR_ZERO_INITIAL_GUESS) {
1548       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1549       its--;
1550     }
1551 
1552     while (its--) {
1553       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1554       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555 
1556       /* update rhs: bb1 = bb - B*x */
1557       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1558       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1559 
1560       /* local sweep */
1561       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1562     }
1563   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1564     if (flag & SOR_ZERO_INITIAL_GUESS) {
1565       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1566       its--;
1567     }
1568     while (its--) {
1569       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1570       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1571 
1572       /* update rhs: bb1 = bb - B*x */
1573       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1574       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1575 
1576       /* local sweep */
1577       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1578     }
1579   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1580     if (flag & SOR_ZERO_INITIAL_GUESS) {
1581       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1582       its--;
1583     }
1584     while (its--) {
1585       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1586       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1587 
1588       /* update rhs: bb1 = bb - B*x */
1589       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1590       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1591 
1592       /* local sweep */
1593       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1594     }
1595   } else if (flag & SOR_EISENSTAT) {
1596     Vec xx1;
1597 
1598     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1599     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1600 
1601     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1602     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     if (!mat->diag) {
1604       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1605       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1606     }
1607     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1608     if (hasop) {
1609       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1610     } else {
1611       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1612     }
1613     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1614 
1615     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1616 
1617     /* local sweep */
1618     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1619     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1620     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1621   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1622 
1623   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1624 
1625   matin->factorerrortype = mat->A->factorerrortype;
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1630 {
1631   Mat            aA,aB,Aperm;
1632   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1633   PetscScalar    *aa,*ba;
1634   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1635   PetscSF        rowsf,sf;
1636   IS             parcolp = NULL;
1637   PetscBool      done;
1638   PetscErrorCode ierr;
1639 
1640   PetscFunctionBegin;
1641   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1642   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1643   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1644   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1645 
1646   /* Invert row permutation to find out where my rows should go */
1647   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1648   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1649   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1650   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1651   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1652   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1653 
1654   /* Invert column permutation to find out where my columns should go */
1655   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1656   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1657   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1658   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1659   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1660   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1661   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1662 
1663   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1664   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1665   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1666 
1667   /* Find out where my gcols should go */
1668   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1669   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1670   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1671   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1672   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1673   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1674   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1675   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1676 
1677   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1678   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1679   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1680   for (i=0; i<m; i++) {
1681     PetscInt row = rdest[i],rowner;
1682     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1683     for (j=ai[i]; j<ai[i+1]; j++) {
1684       PetscInt cowner,col = cdest[aj[j]];
1685       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1686       if (rowner == cowner) dnnz[i]++;
1687       else onnz[i]++;
1688     }
1689     for (j=bi[i]; j<bi[i+1]; j++) {
1690       PetscInt cowner,col = gcdest[bj[j]];
1691       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1692       if (rowner == cowner) dnnz[i]++;
1693       else onnz[i]++;
1694     }
1695   }
1696   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1697   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1701 
1702   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1703   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1705   for (i=0; i<m; i++) {
1706     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1707     PetscInt j0,rowlen;
1708     rowlen = ai[i+1] - ai[i];
1709     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1710       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1711       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1712     }
1713     rowlen = bi[i+1] - bi[i];
1714     for (j0=j=0; j<rowlen; j0=j) {
1715       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1716       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1717     }
1718   }
1719   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1720   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1723   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1725   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1726   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1727   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1728   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1729   *B = Aperm;
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1734 {
1735   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1736   PetscErrorCode ierr;
1737 
1738   PetscFunctionBegin;
1739   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1740   if (ghosts) *ghosts = aij->garray;
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1745 {
1746   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1747   Mat            A    = mat->A,B = mat->B;
1748   PetscErrorCode ierr;
1749   PetscReal      isend[5],irecv[5];
1750 
1751   PetscFunctionBegin;
1752   info->block_size = 1.0;
1753   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1754 
1755   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1756   isend[3] = info->memory;  isend[4] = info->mallocs;
1757 
1758   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1759 
1760   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1761   isend[3] += info->memory;  isend[4] += info->mallocs;
1762   if (flag == MAT_LOCAL) {
1763     info->nz_used      = isend[0];
1764     info->nz_allocated = isend[1];
1765     info->nz_unneeded  = isend[2];
1766     info->memory       = isend[3];
1767     info->mallocs      = isend[4];
1768   } else if (flag == MAT_GLOBAL_MAX) {
1769     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1770 
1771     info->nz_used      = irecv[0];
1772     info->nz_allocated = irecv[1];
1773     info->nz_unneeded  = irecv[2];
1774     info->memory       = irecv[3];
1775     info->mallocs      = irecv[4];
1776   } else if (flag == MAT_GLOBAL_SUM) {
1777     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1778 
1779     info->nz_used      = irecv[0];
1780     info->nz_allocated = irecv[1];
1781     info->nz_unneeded  = irecv[2];
1782     info->memory       = irecv[3];
1783     info->mallocs      = irecv[4];
1784   }
1785   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1786   info->fill_ratio_needed = 0;
1787   info->factor_mallocs    = 0;
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1792 {
1793   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1794   PetscErrorCode ierr;
1795 
1796   PetscFunctionBegin;
1797   switch (op) {
1798   case MAT_NEW_NONZERO_LOCATIONS:
1799   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1800   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1801   case MAT_KEEP_NONZERO_PATTERN:
1802   case MAT_NEW_NONZERO_LOCATION_ERR:
1803   case MAT_USE_INODES:
1804   case MAT_IGNORE_ZERO_ENTRIES:
1805     MatCheckPreallocated(A,1);
1806     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1807     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1808     break;
1809   case MAT_ROW_ORIENTED:
1810     MatCheckPreallocated(A,1);
1811     a->roworiented = flg;
1812 
1813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1814     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1815     break;
1816   case MAT_NEW_DIAGONALS:
1817     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1818     break;
1819   case MAT_IGNORE_OFF_PROC_ENTRIES:
1820     a->donotstash = flg;
1821     break;
1822   case MAT_SPD:
1823     A->spd_set = PETSC_TRUE;
1824     A->spd     = flg;
1825     if (flg) {
1826       A->symmetric                  = PETSC_TRUE;
1827       A->structurally_symmetric     = PETSC_TRUE;
1828       A->symmetric_set              = PETSC_TRUE;
1829       A->structurally_symmetric_set = PETSC_TRUE;
1830     }
1831     break;
1832   case MAT_SYMMETRIC:
1833     MatCheckPreallocated(A,1);
1834     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1835     break;
1836   case MAT_STRUCTURALLY_SYMMETRIC:
1837     MatCheckPreallocated(A,1);
1838     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1839     break;
1840   case MAT_HERMITIAN:
1841     MatCheckPreallocated(A,1);
1842     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1843     break;
1844   case MAT_SYMMETRY_ETERNAL:
1845     MatCheckPreallocated(A,1);
1846     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1847     break;
1848   case MAT_SUBMAT_SINGLEIS:
1849     A->submat_singleis = flg;
1850     break;
1851   case MAT_STRUCTURE_ONLY:
1852     /* The option is handled directly by MatSetOption() */
1853     break;
1854   default:
1855     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1856   }
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1861 {
1862   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1863   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1864   PetscErrorCode ierr;
1865   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1866   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1867   PetscInt       *cmap,*idx_p;
1868 
1869   PetscFunctionBegin;
1870   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1871   mat->getrowactive = PETSC_TRUE;
1872 
1873   if (!mat->rowvalues && (idx || v)) {
1874     /*
1875         allocate enough space to hold information from the longest row.
1876     */
1877     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1878     PetscInt   max = 1,tmp;
1879     for (i=0; i<matin->rmap->n; i++) {
1880       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1881       if (max < tmp) max = tmp;
1882     }
1883     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1884   }
1885 
1886   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1887   lrow = row - rstart;
1888 
1889   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1890   if (!v)   {pvA = 0; pvB = 0;}
1891   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1892   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1893   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1894   nztot = nzA + nzB;
1895 
1896   cmap = mat->garray;
1897   if (v  || idx) {
1898     if (nztot) {
1899       /* Sort by increasing column numbers, assuming A and B already sorted */
1900       PetscInt imark = -1;
1901       if (v) {
1902         *v = v_p = mat->rowvalues;
1903         for (i=0; i<nzB; i++) {
1904           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1905           else break;
1906         }
1907         imark = i;
1908         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1909         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1910       }
1911       if (idx) {
1912         *idx = idx_p = mat->rowindices;
1913         if (imark > -1) {
1914           for (i=0; i<imark; i++) {
1915             idx_p[i] = cmap[cworkB[i]];
1916           }
1917         } else {
1918           for (i=0; i<nzB; i++) {
1919             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1920             else break;
1921           }
1922           imark = i;
1923         }
1924         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1925         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1926       }
1927     } else {
1928       if (idx) *idx = 0;
1929       if (v)   *v   = 0;
1930     }
1931   }
1932   *nz  = nztot;
1933   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1934   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1939 {
1940   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1941 
1942   PetscFunctionBegin;
1943   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1944   aij->getrowactive = PETSC_FALSE;
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1949 {
1950   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1951   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1952   PetscErrorCode ierr;
1953   PetscInt       i,j,cstart = mat->cmap->rstart;
1954   PetscReal      sum = 0.0;
1955   MatScalar      *v;
1956 
1957   PetscFunctionBegin;
1958   if (aij->size == 1) {
1959     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1960   } else {
1961     if (type == NORM_FROBENIUS) {
1962       v = amat->a;
1963       for (i=0; i<amat->nz; i++) {
1964         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1965       }
1966       v = bmat->a;
1967       for (i=0; i<bmat->nz; i++) {
1968         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1969       }
1970       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1971       *norm = PetscSqrtReal(*norm);
1972       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1973     } else if (type == NORM_1) { /* max column norm */
1974       PetscReal *tmp,*tmp2;
1975       PetscInt  *jj,*garray = aij->garray;
1976       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1977       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1978       *norm = 0.0;
1979       v     = amat->a; jj = amat->j;
1980       for (j=0; j<amat->nz; j++) {
1981         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1982       }
1983       v = bmat->a; jj = bmat->j;
1984       for (j=0; j<bmat->nz; j++) {
1985         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1986       }
1987       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1988       for (j=0; j<mat->cmap->N; j++) {
1989         if (tmp2[j] > *norm) *norm = tmp2[j];
1990       }
1991       ierr = PetscFree(tmp);CHKERRQ(ierr);
1992       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1993       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1994     } else if (type == NORM_INFINITY) { /* max row norm */
1995       PetscReal ntemp = 0.0;
1996       for (j=0; j<aij->A->rmap->n; j++) {
1997         v   = amat->a + amat->i[j];
1998         sum = 0.0;
1999         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2000           sum += PetscAbsScalar(*v); v++;
2001         }
2002         v = bmat->a + bmat->i[j];
2003         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2004           sum += PetscAbsScalar(*v); v++;
2005         }
2006         if (sum > ntemp) ntemp = sum;
2007       }
2008       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2009       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2010     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2011   }
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2016 {
2017   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2018   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2019   PetscErrorCode ierr;
2020   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2021   PetscInt       cstart = A->cmap->rstart,ncol;
2022   Mat            B;
2023   MatScalar      *array;
2024 
2025   PetscFunctionBegin;
2026   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2027   ai = Aloc->i; aj = Aloc->j;
2028   bi = Bloc->i; bj = Bloc->j;
2029   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2030     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2031     PetscSFNode          *oloc;
2032     PETSC_UNUSED PetscSF sf;
2033 
2034     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2035     /* compute d_nnz for preallocation */
2036     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2037     for (i=0; i<ai[ma]; i++) {
2038       d_nnz[aj[i]]++;
2039       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2040     }
2041     /* compute local off-diagonal contributions */
2042     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2043     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2044     /* map those to global */
2045     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2046     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2047     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2048     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2049     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2050     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2051     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2052 
2053     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2054     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2055     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2056     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2057     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2058     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2059   } else {
2060     B    = *matout;
2061     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2062     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2063   }
2064 
2065   /* copy over the A part */
2066   array = Aloc->a;
2067   row   = A->rmap->rstart;
2068   for (i=0; i<ma; i++) {
2069     ncol = ai[i+1]-ai[i];
2070     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2071     row++;
2072     array += ncol; aj += ncol;
2073   }
2074   aj = Aloc->j;
2075   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2076 
2077   /* copy over the B part */
2078   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2079   array = Bloc->a;
2080   row   = A->rmap->rstart;
2081   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2082   cols_tmp = cols;
2083   for (i=0; i<mb; i++) {
2084     ncol = bi[i+1]-bi[i];
2085     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2086     row++;
2087     array += ncol; cols_tmp += ncol;
2088   }
2089   ierr = PetscFree(cols);CHKERRQ(ierr);
2090 
2091   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2092   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2093   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2094     *matout = B;
2095   } else {
2096     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2097   }
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2102 {
2103   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2104   Mat            a    = aij->A,b = aij->B;
2105   PetscErrorCode ierr;
2106   PetscInt       s1,s2,s3;
2107 
2108   PetscFunctionBegin;
2109   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2110   if (rr) {
2111     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2112     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2113     /* Overlap communication with computation. */
2114     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2115   }
2116   if (ll) {
2117     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2118     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2119     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2120   }
2121   /* scale  the diagonal block */
2122   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2123 
2124   if (rr) {
2125     /* Do a scatter end and then right scale the off-diagonal block */
2126     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2127     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2128   }
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2133 {
2134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2135   PetscErrorCode ierr;
2136 
2137   PetscFunctionBegin;
2138   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2143 {
2144   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2145   Mat            a,b,c,d;
2146   PetscBool      flg;
2147   PetscErrorCode ierr;
2148 
2149   PetscFunctionBegin;
2150   a = matA->A; b = matA->B;
2151   c = matB->A; d = matB->B;
2152 
2153   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2154   if (flg) {
2155     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2156   }
2157   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2162 {
2163   PetscErrorCode ierr;
2164   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2165   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2166 
2167   PetscFunctionBegin;
2168   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2169   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2170     /* because of the column compression in the off-processor part of the matrix a->B,
2171        the number of columns in a->B and b->B may be different, hence we cannot call
2172        the MatCopy() directly on the two parts. If need be, we can provide a more
2173        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2174        then copying the submatrices */
2175     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2176   } else {
2177     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2178     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2179   }
2180   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2185 {
2186   PetscErrorCode ierr;
2187 
2188   PetscFunctionBegin;
2189   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 /*
2194    Computes the number of nonzeros per row needed for preallocation when X and Y
2195    have different nonzero structure.
2196 */
2197 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2198 {
2199   PetscInt       i,j,k,nzx,nzy;
2200 
2201   PetscFunctionBegin;
2202   /* Set the number of nonzeros in the new matrix */
2203   for (i=0; i<m; i++) {
2204     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2205     nzx = xi[i+1] - xi[i];
2206     nzy = yi[i+1] - yi[i];
2207     nnz[i] = 0;
2208     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2209       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2210       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2211       nnz[i]++;
2212     }
2213     for (; k<nzy; k++) nnz[i]++;
2214   }
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2219 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2220 {
2221   PetscErrorCode ierr;
2222   PetscInt       m = Y->rmap->N;
2223   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2224   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2225 
2226   PetscFunctionBegin;
2227   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2232 {
2233   PetscErrorCode ierr;
2234   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2235   PetscBLASInt   bnz,one=1;
2236   Mat_SeqAIJ     *x,*y;
2237 
2238   PetscFunctionBegin;
2239   if (str == SAME_NONZERO_PATTERN) {
2240     PetscScalar alpha = a;
2241     x    = (Mat_SeqAIJ*)xx->A->data;
2242     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2243     y    = (Mat_SeqAIJ*)yy->A->data;
2244     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2245     x    = (Mat_SeqAIJ*)xx->B->data;
2246     y    = (Mat_SeqAIJ*)yy->B->data;
2247     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2248     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2249     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2250   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2251     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2252   } else {
2253     Mat      B;
2254     PetscInt *nnz_d,*nnz_o;
2255     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2256     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2257     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2258     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2259     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2260     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2261     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2262     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2263     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2264     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2265     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2266     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2267     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2268     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2269   }
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2274 
2275 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2276 {
2277 #if defined(PETSC_USE_COMPLEX)
2278   PetscErrorCode ierr;
2279   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2280 
2281   PetscFunctionBegin;
2282   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2283   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2284 #else
2285   PetscFunctionBegin;
2286 #endif
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2291 {
2292   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2293   PetscErrorCode ierr;
2294 
2295   PetscFunctionBegin;
2296   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2297   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2302 {
2303   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2304   PetscErrorCode ierr;
2305 
2306   PetscFunctionBegin;
2307   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2308   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2309   PetscFunctionReturn(0);
2310 }
2311 
2312 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2313 {
2314   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2315   PetscErrorCode ierr;
2316   PetscInt       i,*idxb = 0;
2317   PetscScalar    *va,*vb;
2318   Vec            vtmp;
2319 
2320   PetscFunctionBegin;
2321   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2322   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2323   if (idx) {
2324     for (i=0; i<A->rmap->n; i++) {
2325       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2326     }
2327   }
2328 
2329   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2330   if (idx) {
2331     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2332   }
2333   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2334   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2335 
2336   for (i=0; i<A->rmap->n; i++) {
2337     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2338       va[i] = vb[i];
2339       if (idx) idx[i] = a->garray[idxb[i]];
2340     }
2341   }
2342 
2343   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2344   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2345   ierr = PetscFree(idxb);CHKERRQ(ierr);
2346   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2347   PetscFunctionReturn(0);
2348 }
2349 
2350 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2351 {
2352   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2353   PetscErrorCode ierr;
2354   PetscInt       i,*idxb = 0;
2355   PetscScalar    *va,*vb;
2356   Vec            vtmp;
2357 
2358   PetscFunctionBegin;
2359   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2360   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2361   if (idx) {
2362     for (i=0; i<A->cmap->n; i++) {
2363       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2364     }
2365   }
2366 
2367   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2368   if (idx) {
2369     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2370   }
2371   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2372   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2373 
2374   for (i=0; i<A->rmap->n; i++) {
2375     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2376       va[i] = vb[i];
2377       if (idx) idx[i] = a->garray[idxb[i]];
2378     }
2379   }
2380 
2381   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2382   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2383   ierr = PetscFree(idxb);CHKERRQ(ierr);
2384   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2389 {
2390   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2391   PetscInt       n      = A->rmap->n;
2392   PetscInt       cstart = A->cmap->rstart;
2393   PetscInt       *cmap  = mat->garray;
2394   PetscInt       *diagIdx, *offdiagIdx;
2395   Vec            diagV, offdiagV;
2396   PetscScalar    *a, *diagA, *offdiagA;
2397   PetscInt       r;
2398   PetscErrorCode ierr;
2399 
2400   PetscFunctionBegin;
2401   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2402   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2403   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2404   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2405   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2406   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2407   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2408   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2409   for (r = 0; r < n; ++r) {
2410     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2411       a[r]   = diagA[r];
2412       idx[r] = cstart + diagIdx[r];
2413     } else {
2414       a[r]   = offdiagA[r];
2415       idx[r] = cmap[offdiagIdx[r]];
2416     }
2417   }
2418   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2419   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2420   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2421   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2422   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2423   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2424   PetscFunctionReturn(0);
2425 }
2426 
2427 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2428 {
2429   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2430   PetscInt       n      = A->rmap->n;
2431   PetscInt       cstart = A->cmap->rstart;
2432   PetscInt       *cmap  = mat->garray;
2433   PetscInt       *diagIdx, *offdiagIdx;
2434   Vec            diagV, offdiagV;
2435   PetscScalar    *a, *diagA, *offdiagA;
2436   PetscInt       r;
2437   PetscErrorCode ierr;
2438 
2439   PetscFunctionBegin;
2440   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2441   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2442   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2443   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2444   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2445   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2446   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2447   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2448   for (r = 0; r < n; ++r) {
2449     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2450       a[r]   = diagA[r];
2451       idx[r] = cstart + diagIdx[r];
2452     } else {
2453       a[r]   = offdiagA[r];
2454       idx[r] = cmap[offdiagIdx[r]];
2455     }
2456   }
2457   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2458   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2459   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2460   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2461   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2462   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2467 {
2468   PetscErrorCode ierr;
2469   Mat            *dummy;
2470 
2471   PetscFunctionBegin;
2472   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2473   *newmat = *dummy;
2474   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2479 {
2480   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2481   PetscErrorCode ierr;
2482 
2483   PetscFunctionBegin;
2484   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2485   A->factorerrortype = a->A->factorerrortype;
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2490 {
2491   PetscErrorCode ierr;
2492   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2493 
2494   PetscFunctionBegin;
2495   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2496   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2497   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2498   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2503 {
2504   PetscFunctionBegin;
2505   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2506   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 /*@
2511    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2512 
2513    Collective on Mat
2514 
2515    Input Parameters:
2516 +    A - the matrix
2517 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2518 
2519  Level: advanced
2520 
2521 @*/
2522 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2523 {
2524   PetscErrorCode       ierr;
2525 
2526   PetscFunctionBegin;
2527   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2532 {
2533   PetscErrorCode       ierr;
2534   PetscBool            sc = PETSC_FALSE,flg;
2535 
2536   PetscFunctionBegin;
2537   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2538   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2539   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2540   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2541   if (flg) {
2542     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2543   }
2544   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2549 {
2550   PetscErrorCode ierr;
2551   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2552   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2553 
2554   PetscFunctionBegin;
2555   if (!Y->preallocated) {
2556     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2557   } else if (!aij->nz) {
2558     PetscInt nonew = aij->nonew;
2559     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2560     aij->nonew = nonew;
2561   }
2562   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2567 {
2568   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2569   PetscErrorCode ierr;
2570 
2571   PetscFunctionBegin;
2572   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2573   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2574   if (d) {
2575     PetscInt rstart;
2576     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2577     *d += rstart;
2578 
2579   }
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 
2584 /* -------------------------------------------------------------------*/
2585 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2586                                        MatGetRow_MPIAIJ,
2587                                        MatRestoreRow_MPIAIJ,
2588                                        MatMult_MPIAIJ,
2589                                 /* 4*/ MatMultAdd_MPIAIJ,
2590                                        MatMultTranspose_MPIAIJ,
2591                                        MatMultTransposeAdd_MPIAIJ,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*10*/ 0,
2596                                        0,
2597                                        0,
2598                                        MatSOR_MPIAIJ,
2599                                        MatTranspose_MPIAIJ,
2600                                 /*15*/ MatGetInfo_MPIAIJ,
2601                                        MatEqual_MPIAIJ,
2602                                        MatGetDiagonal_MPIAIJ,
2603                                        MatDiagonalScale_MPIAIJ,
2604                                        MatNorm_MPIAIJ,
2605                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2606                                        MatAssemblyEnd_MPIAIJ,
2607                                        MatSetOption_MPIAIJ,
2608                                        MatZeroEntries_MPIAIJ,
2609                                 /*24*/ MatZeroRows_MPIAIJ,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                 /*29*/ MatSetUp_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                        MatGetDiagonalBlock_MPIAIJ,
2618                                        0,
2619                                 /*34*/ MatDuplicate_MPIAIJ,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                 /*39*/ MatAXPY_MPIAIJ,
2625                                        MatCreateSubMatrices_MPIAIJ,
2626                                        MatIncreaseOverlap_MPIAIJ,
2627                                        MatGetValues_MPIAIJ,
2628                                        MatCopy_MPIAIJ,
2629                                 /*44*/ MatGetRowMax_MPIAIJ,
2630                                        MatScale_MPIAIJ,
2631                                        MatShift_MPIAIJ,
2632                                        MatDiagonalSet_MPIAIJ,
2633                                        MatZeroRowsColumns_MPIAIJ,
2634                                 /*49*/ MatSetRandom_MPIAIJ,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2640                                        0,
2641                                        MatSetUnfactored_MPIAIJ,
2642                                        MatPermute_MPIAIJ,
2643                                        0,
2644                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2645                                        MatDestroy_MPIAIJ,
2646                                        MatView_MPIAIJ,
2647                                        0,
2648                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2649                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2650                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2655                                        MatGetRowMinAbs_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*75*/ MatFDColoringApply_AIJ,
2661                                        MatSetFromOptions_MPIAIJ,
2662                                        0,
2663                                        0,
2664                                        MatFindZeroDiagonals_MPIAIJ,
2665                                 /*80*/ 0,
2666                                        0,
2667                                        0,
2668                                 /*83*/ MatLoad_MPIAIJ,
2669                                        MatIsSymmetric_MPIAIJ,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2675                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2676                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2677                                        MatPtAP_MPIAIJ_MPIAIJ,
2678                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2679                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                 /*99*/ 0,
2685                                        0,
2686                                        0,
2687                                        MatConjugate_MPIAIJ,
2688                                        0,
2689                                 /*104*/MatSetValuesRow_MPIAIJ,
2690                                        MatRealPart_MPIAIJ,
2691                                        MatImaginaryPart_MPIAIJ,
2692                                        0,
2693                                        0,
2694                                 /*109*/0,
2695                                        0,
2696                                        MatGetRowMin_MPIAIJ,
2697                                        0,
2698                                        MatMissingDiagonal_MPIAIJ,
2699                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2700                                        0,
2701                                        MatGetGhosts_MPIAIJ,
2702                                        0,
2703                                        0,
2704                                 /*119*/0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                        MatGetMultiProcBlock_MPIAIJ,
2709                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2710                                        MatGetColumnNorms_MPIAIJ,
2711                                        MatInvertBlockDiagonal_MPIAIJ,
2712                                        0,
2713                                        MatCreateSubMatricesMPI_MPIAIJ,
2714                                 /*129*/0,
2715                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2716                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2717                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2718                                        0,
2719                                 /*134*/0,
2720                                        0,
2721                                        MatRARt_MPIAIJ_MPIAIJ,
2722                                        0,
2723                                        0,
2724                                 /*139*/MatSetBlockSizes_MPIAIJ,
2725                                        0,
2726                                        0,
2727                                        MatFDColoringSetUp_MPIXAIJ,
2728                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2729                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2730 };
2731 
2732 /* ----------------------------------------------------------------------------------------*/
2733 
2734 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2735 {
2736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2737   PetscErrorCode ierr;
2738 
2739   PetscFunctionBegin;
2740   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2741   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2742   PetscFunctionReturn(0);
2743 }
2744 
2745 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2746 {
2747   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2748   PetscErrorCode ierr;
2749 
2750   PetscFunctionBegin;
2751   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2752   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2757 {
2758   Mat_MPIAIJ     *b;
2759   PetscErrorCode ierr;
2760 
2761   PetscFunctionBegin;
2762   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2763   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2764   b = (Mat_MPIAIJ*)B->data;
2765 
2766 #if defined(PETSC_USE_CTABLE)
2767   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2768 #else
2769   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2770 #endif
2771   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2772   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2773   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2774 
2775   /* Because the B will have been resized we simply destroy it and create a new one each time */
2776   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2777   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2778   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2779   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2780   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2781   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2782 
2783   if (!B->preallocated) {
2784     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2785     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2786     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2787     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2788     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2789   }
2790 
2791   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2792   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2793   B->preallocated  = PETSC_TRUE;
2794   B->was_assembled = PETSC_FALSE;
2795   B->assembled     = PETSC_FALSE;;
2796   PetscFunctionReturn(0);
2797 }
2798 
2799 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2800 {
2801   Mat_MPIAIJ     *b;
2802   PetscErrorCode ierr;
2803 
2804   PetscFunctionBegin;
2805   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2806   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2807   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2808   b = (Mat_MPIAIJ*)B->data;
2809 
2810 #if defined(PETSC_USE_CTABLE)
2811   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2812 #else
2813   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2814 #endif
2815   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2816   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2817   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2818 
2819   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2820   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2821   B->preallocated  = PETSC_TRUE;
2822   B->was_assembled = PETSC_FALSE;
2823   B->assembled = PETSC_FALSE;
2824   PetscFunctionReturn(0);
2825 }
2826 
2827 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2828 {
2829   Mat            mat;
2830   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2831   PetscErrorCode ierr;
2832 
2833   PetscFunctionBegin;
2834   *newmat = 0;
2835   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2836   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2837   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2838   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2839   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2840   a       = (Mat_MPIAIJ*)mat->data;
2841 
2842   mat->factortype   = matin->factortype;
2843   mat->assembled    = PETSC_TRUE;
2844   mat->insertmode   = NOT_SET_VALUES;
2845   mat->preallocated = PETSC_TRUE;
2846 
2847   a->size         = oldmat->size;
2848   a->rank         = oldmat->rank;
2849   a->donotstash   = oldmat->donotstash;
2850   a->roworiented  = oldmat->roworiented;
2851   a->rowindices   = 0;
2852   a->rowvalues    = 0;
2853   a->getrowactive = PETSC_FALSE;
2854 
2855   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2856   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2857 
2858   if (oldmat->colmap) {
2859 #if defined(PETSC_USE_CTABLE)
2860     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2861 #else
2862     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2863     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2864     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2865 #endif
2866   } else a->colmap = 0;
2867   if (oldmat->garray) {
2868     PetscInt len;
2869     len  = oldmat->B->cmap->n;
2870     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2871     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2872     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2873   } else a->garray = 0;
2874 
2875   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2876   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2877   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2878   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2879 
2880   if (oldmat->Mvctx_mpi1) {
2881     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2882     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2883   }
2884 
2885   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2886   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2887   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2888   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2889   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2890   *newmat = mat;
2891   PetscFunctionReturn(0);
2892 }
2893 
2894 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2895 {
2896   PetscScalar    *vals,*svals;
2897   MPI_Comm       comm;
2898   PetscErrorCode ierr;
2899   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2900   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2901   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2902   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2903   PetscInt       cend,cstart,n,*rowners;
2904   int            fd;
2905   PetscInt       bs = newMat->rmap->bs;
2906 
2907   PetscFunctionBegin;
2908   /* force binary viewer to load .info file if it has not yet done so */
2909   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2910   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2911   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2912   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2913   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2914   if (!rank) {
2915     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2916     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2917     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2918   }
2919 
2920   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2921   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2922   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2923   if (bs < 0) bs = 1;
2924 
2925   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2926   M    = header[1]; N = header[2];
2927 
2928   /* If global sizes are set, check if they are consistent with that given in the file */
2929   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2930   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2931 
2932   /* determine ownership of all (block) rows */
2933   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2934   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2935   else m = newMat->rmap->n; /* Set by user */
2936 
2937   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2938   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2939 
2940   /* First process needs enough room for process with most rows */
2941   if (!rank) {
2942     mmax = rowners[1];
2943     for (i=2; i<=size; i++) {
2944       mmax = PetscMax(mmax, rowners[i]);
2945     }
2946   } else mmax = -1;             /* unused, but compilers complain */
2947 
2948   rowners[0] = 0;
2949   for (i=2; i<=size; i++) {
2950     rowners[i] += rowners[i-1];
2951   }
2952   rstart = rowners[rank];
2953   rend   = rowners[rank+1];
2954 
2955   /* distribute row lengths to all processors */
2956   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2957   if (!rank) {
2958     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2959     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2960     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2961     for (j=0; j<m; j++) {
2962       procsnz[0] += ourlens[j];
2963     }
2964     for (i=1; i<size; i++) {
2965       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2966       /* calculate the number of nonzeros on each processor */
2967       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2968         procsnz[i] += rowlengths[j];
2969       }
2970       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2971     }
2972     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2973   } else {
2974     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2975   }
2976 
2977   if (!rank) {
2978     /* determine max buffer needed and allocate it */
2979     maxnz = 0;
2980     for (i=0; i<size; i++) {
2981       maxnz = PetscMax(maxnz,procsnz[i]);
2982     }
2983     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2984 
2985     /* read in my part of the matrix column indices  */
2986     nz   = procsnz[0];
2987     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2988     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2989 
2990     /* read in every one elses and ship off */
2991     for (i=1; i<size; i++) {
2992       nz   = procsnz[i];
2993       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2994       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2995     }
2996     ierr = PetscFree(cols);CHKERRQ(ierr);
2997   } else {
2998     /* determine buffer space needed for message */
2999     nz = 0;
3000     for (i=0; i<m; i++) {
3001       nz += ourlens[i];
3002     }
3003     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3004 
3005     /* receive message of column indices*/
3006     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3007   }
3008 
3009   /* determine column ownership if matrix is not square */
3010   if (N != M) {
3011     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3012     else n = newMat->cmap->n;
3013     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3014     cstart = cend - n;
3015   } else {
3016     cstart = rstart;
3017     cend   = rend;
3018     n      = cend - cstart;
3019   }
3020 
3021   /* loop over local rows, determining number of off diagonal entries */
3022   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3023   jj   = 0;
3024   for (i=0; i<m; i++) {
3025     for (j=0; j<ourlens[i]; j++) {
3026       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3027       jj++;
3028     }
3029   }
3030 
3031   for (i=0; i<m; i++) {
3032     ourlens[i] -= offlens[i];
3033   }
3034   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3035 
3036   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3037 
3038   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3039 
3040   for (i=0; i<m; i++) {
3041     ourlens[i] += offlens[i];
3042   }
3043 
3044   if (!rank) {
3045     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3046 
3047     /* read in my part of the matrix numerical values  */
3048     nz   = procsnz[0];
3049     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3050 
3051     /* insert into matrix */
3052     jj      = rstart;
3053     smycols = mycols;
3054     svals   = vals;
3055     for (i=0; i<m; i++) {
3056       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3057       smycols += ourlens[i];
3058       svals   += ourlens[i];
3059       jj++;
3060     }
3061 
3062     /* read in other processors and ship out */
3063     for (i=1; i<size; i++) {
3064       nz   = procsnz[i];
3065       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3066       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3067     }
3068     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3069   } else {
3070     /* receive numeric values */
3071     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3072 
3073     /* receive message of values*/
3074     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3075 
3076     /* insert into matrix */
3077     jj      = rstart;
3078     smycols = mycols;
3079     svals   = vals;
3080     for (i=0; i<m; i++) {
3081       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3082       smycols += ourlens[i];
3083       svals   += ourlens[i];
3084       jj++;
3085     }
3086   }
3087   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3088   ierr = PetscFree(vals);CHKERRQ(ierr);
3089   ierr = PetscFree(mycols);CHKERRQ(ierr);
3090   ierr = PetscFree(rowners);CHKERRQ(ierr);
3091   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3092   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3093   PetscFunctionReturn(0);
3094 }
3095 
3096 /* Not scalable because of ISAllGather() unless getting all columns. */
3097 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3098 {
3099   PetscErrorCode ierr;
3100   IS             iscol_local;
3101   PetscBool      isstride;
3102   PetscMPIInt    lisstride=0,gisstride;
3103 
3104   PetscFunctionBegin;
3105   /* check if we are grabbing all columns*/
3106   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3107 
3108   if (isstride) {
3109     PetscInt  start,len,mstart,mlen;
3110     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3111     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3112     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3113     if (mstart == start && mlen-mstart == len) lisstride = 1;
3114   }
3115 
3116   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3117   if (gisstride) {
3118     PetscInt N;
3119     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3120     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3121     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3122     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3123   } else {
3124     PetscInt cbs;
3125     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3126     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3127     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3128   }
3129 
3130   *isseq = iscol_local;
3131   PetscFunctionReturn(0);
3132 }
3133 
3134 /*
3135  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3136  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3137 
3138  Input Parameters:
3139    mat - matrix
3140    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3141            i.e., mat->rstart <= isrow[i] < mat->rend
3142    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3143            i.e., mat->cstart <= iscol[i] < mat->cend
3144  Output Parameter:
3145    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3146    iscol_o - sequential column index set for retrieving mat->B
3147    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3148  */
3149 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3150 {
3151   PetscErrorCode ierr;
3152   Vec            x,cmap;
3153   const PetscInt *is_idx;
3154   PetscScalar    *xarray,*cmaparray;
3155   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3156   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3157   Mat            B=a->B;
3158   Vec            lvec=a->lvec,lcmap;
3159   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3160   MPI_Comm       comm;
3161   VecScatter     Mvctx=a->Mvctx;
3162 
3163   PetscFunctionBegin;
3164   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3165   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3166 
3167   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3168   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3169   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3170   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3171   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3172 
3173   /* Get start indices */
3174   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3175   isstart -= ncols;
3176   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3177 
3178   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3179   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3180   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3181   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3182   for (i=0; i<ncols; i++) {
3183     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3184     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3185     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3186   }
3187   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3188   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3189   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3190 
3191   /* Get iscol_d */
3192   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3193   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3194   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3195 
3196   /* Get isrow_d */
3197   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3198   rstart = mat->rmap->rstart;
3199   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3200   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3201   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3202   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3203 
3204   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3205   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3206   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3207 
3208   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3209   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3210   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3211 
3212   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3213 
3214   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3215   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3216 
3217   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3218   /* off-process column indices */
3219   count = 0;
3220   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3221   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3222 
3223   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3224   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3225   for (i=0; i<Bn; i++) {
3226     if (PetscRealPart(xarray[i]) > -1.0) {
3227       idx[count]     = i;                   /* local column index in off-diagonal part B */
3228       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3229       count++;
3230     }
3231   }
3232   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3233   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3234 
3235   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3236   /* cannot ensure iscol_o has same blocksize as iscol! */
3237 
3238   ierr = PetscFree(idx);CHKERRQ(ierr);
3239   *garray = cmap1;
3240 
3241   ierr = VecDestroy(&x);CHKERRQ(ierr);
3242   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3243   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3244   PetscFunctionReturn(0);
3245 }
3246 
3247 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3248 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3249 {
3250   PetscErrorCode ierr;
3251   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3252   Mat            M = NULL;
3253   MPI_Comm       comm;
3254   IS             iscol_d,isrow_d,iscol_o;
3255   Mat            Asub = NULL,Bsub = NULL;
3256   PetscInt       n;
3257 
3258   PetscFunctionBegin;
3259   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3260 
3261   if (call == MAT_REUSE_MATRIX) {
3262     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3263     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3264     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3265 
3266     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3267     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3268 
3269     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3270     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3271 
3272     /* Update diagonal and off-diagonal portions of submat */
3273     asub = (Mat_MPIAIJ*)(*submat)->data;
3274     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3275     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3276     if (n) {
3277       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3278     }
3279     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3280     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3281 
3282   } else { /* call == MAT_INITIAL_MATRIX) */
3283     const PetscInt *garray;
3284     PetscInt        BsubN;
3285 
3286     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3287     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3288 
3289     /* Create local submatrices Asub and Bsub */
3290     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3291     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3292 
3293     /* Create submatrix M */
3294     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3295 
3296     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3297     asub = (Mat_MPIAIJ*)M->data;
3298 
3299     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3300     n = asub->B->cmap->N;
3301     if (BsubN > n) {
3302       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3303       const PetscInt *idx;
3304       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3305       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3306 
3307       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3308       j = 0;
3309       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3310       for (i=0; i<n; i++) {
3311         if (j >= BsubN) break;
3312         while (subgarray[i] > garray[j]) j++;
3313 
3314         if (subgarray[i] == garray[j]) {
3315           idx_new[i] = idx[j++];
3316         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3317       }
3318       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3319 
3320       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3321       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3322 
3323     } else if (BsubN < n) {
3324       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3325     }
3326 
3327     ierr = PetscFree(garray);CHKERRQ(ierr);
3328     *submat = M;
3329 
3330     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3331     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3332     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3333 
3334     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3335     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3336 
3337     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3338     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3339   }
3340   PetscFunctionReturn(0);
3341 }
3342 
3343 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3344 {
3345   PetscErrorCode ierr;
3346   IS             iscol_local=NULL,isrow_d;
3347   PetscInt       csize;
3348   PetscInt       n,i,j,start,end;
3349   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3350   MPI_Comm       comm;
3351 
3352   PetscFunctionBegin;
3353   /* If isrow has same processor distribution as mat,
3354      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3355   if (call == MAT_REUSE_MATRIX) {
3356     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3357     if (isrow_d) {
3358       sameRowDist  = PETSC_TRUE;
3359       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3360     } else {
3361       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3362       if (iscol_local) {
3363         sameRowDist  = PETSC_TRUE;
3364         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3365       }
3366     }
3367   } else {
3368     /* Check if isrow has same processor distribution as mat */
3369     sameDist[0] = PETSC_FALSE;
3370     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3371     if (!n) {
3372       sameDist[0] = PETSC_TRUE;
3373     } else {
3374       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3375       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3376       if (i >= start && j < end) {
3377         sameDist[0] = PETSC_TRUE;
3378       }
3379     }
3380 
3381     /* Check if iscol has same processor distribution as mat */
3382     sameDist[1] = PETSC_FALSE;
3383     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3384     if (!n) {
3385       sameDist[1] = PETSC_TRUE;
3386     } else {
3387       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3388       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3389       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3390     }
3391 
3392     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3393     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3394     sameRowDist = tsameDist[0];
3395   }
3396 
3397   if (sameRowDist) {
3398     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3399       /* isrow and iscol have same processor distribution as mat */
3400       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3401       PetscFunctionReturn(0);
3402     } else { /* sameRowDist */
3403       /* isrow has same processor distribution as mat */
3404       if (call == MAT_INITIAL_MATRIX) {
3405         PetscBool sorted;
3406         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3407         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3408         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3409         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3410 
3411         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3412         if (sorted) {
3413           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3414           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3415           PetscFunctionReturn(0);
3416         }
3417       } else { /* call == MAT_REUSE_MATRIX */
3418         IS    iscol_sub;
3419         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3420         if (iscol_sub) {
3421           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3422           PetscFunctionReturn(0);
3423         }
3424       }
3425     }
3426   }
3427 
3428   /* General case: iscol -> iscol_local which has global size of iscol */
3429   if (call == MAT_REUSE_MATRIX) {
3430     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3431     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3432   } else {
3433     if (!iscol_local) {
3434       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3435     }
3436   }
3437 
3438   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3439   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3440 
3441   if (call == MAT_INITIAL_MATRIX) {
3442     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3443     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3444   }
3445   PetscFunctionReturn(0);
3446 }
3447 
3448 /*@C
3449      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3450          and "off-diagonal" part of the matrix in CSR format.
3451 
3452    Collective on MPI_Comm
3453 
3454    Input Parameters:
3455 +  comm - MPI communicator
3456 .  A - "diagonal" portion of matrix
3457 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3458 -  garray - global index of B columns
3459 
3460    Output Parameter:
3461 .   mat - the matrix, with input A as its local diagonal matrix
3462    Level: advanced
3463 
3464    Notes:
3465        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3466        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3467 
3468 .seealso: MatCreateMPIAIJWithSplitArrays()
3469 @*/
3470 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3471 {
3472   PetscErrorCode ierr;
3473   Mat_MPIAIJ     *maij;
3474   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3475   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3476   PetscScalar    *oa=b->a;
3477   Mat            Bnew;
3478   PetscInt       m,n,N;
3479 
3480   PetscFunctionBegin;
3481   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3482   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3483   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3484   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3485   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3486   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3487 
3488   /* Get global columns of mat */
3489   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3490 
3491   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3492   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3493   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3494   maij = (Mat_MPIAIJ*)(*mat)->data;
3495 
3496   (*mat)->preallocated = PETSC_TRUE;
3497 
3498   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3499   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3500 
3501   /* Set A as diagonal portion of *mat */
3502   maij->A = A;
3503 
3504   nz = oi[m];
3505   for (i=0; i<nz; i++) {
3506     col   = oj[i];
3507     oj[i] = garray[col];
3508   }
3509 
3510    /* Set Bnew as off-diagonal portion of *mat */
3511   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3512   bnew        = (Mat_SeqAIJ*)Bnew->data;
3513   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3514   maij->B     = Bnew;
3515 
3516   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3517 
3518   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3519   b->free_a       = PETSC_FALSE;
3520   b->free_ij      = PETSC_FALSE;
3521   ierr = MatDestroy(&B);CHKERRQ(ierr);
3522 
3523   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3524   bnew->free_a       = PETSC_TRUE;
3525   bnew->free_ij      = PETSC_TRUE;
3526 
3527   /* condense columns of maij->B */
3528   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3529   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3530   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3531   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3532   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3533   PetscFunctionReturn(0);
3534 }
3535 
3536 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3537 
3538 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3539 {
3540   PetscErrorCode ierr;
3541   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3542   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3543   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3544   Mat            M,Msub,B=a->B;
3545   MatScalar      *aa;
3546   Mat_SeqAIJ     *aij;
3547   PetscInt       *garray = a->garray,*colsub,Ncols;
3548   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3549   IS             iscol_sub,iscmap;
3550   const PetscInt *is_idx,*cmap;
3551   PetscBool      allcolumns=PETSC_FALSE;
3552   MPI_Comm       comm;
3553 
3554   PetscFunctionBegin;
3555   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3556 
3557   if (call == MAT_REUSE_MATRIX) {
3558     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3559     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3560     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3561 
3562     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3563     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3564 
3565     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3566     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3567 
3568     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3569 
3570   } else { /* call == MAT_INITIAL_MATRIX) */
3571     PetscBool flg;
3572 
3573     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3574     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3575 
3576     /* (1) iscol -> nonscalable iscol_local */
3577     /* Check for special case: each processor gets entire matrix columns */
3578     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3579     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3580     if (allcolumns) {
3581       iscol_sub = iscol_local;
3582       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3583       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3584 
3585     } else {
3586       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3587       PetscInt *idx,*cmap1,k;
3588       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3589       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3590       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3591       count = 0;
3592       k     = 0;
3593       for (i=0; i<Ncols; i++) {
3594         j = is_idx[i];
3595         if (j >= cstart && j < cend) {
3596           /* diagonal part of mat */
3597           idx[count]     = j;
3598           cmap1[count++] = i; /* column index in submat */
3599         } else if (Bn) {
3600           /* off-diagonal part of mat */
3601           if (j == garray[k]) {
3602             idx[count]     = j;
3603             cmap1[count++] = i;  /* column index in submat */
3604           } else if (j > garray[k]) {
3605             while (j > garray[k] && k < Bn-1) k++;
3606             if (j == garray[k]) {
3607               idx[count]     = j;
3608               cmap1[count++] = i; /* column index in submat */
3609             }
3610           }
3611         }
3612       }
3613       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3614 
3615       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3616       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3617       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3618 
3619       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3620     }
3621 
3622     /* (3) Create sequential Msub */
3623     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3624   }
3625 
3626   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3627   aij  = (Mat_SeqAIJ*)(Msub)->data;
3628   ii   = aij->i;
3629   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3630 
3631   /*
3632       m - number of local rows
3633       Ncols - number of columns (same on all processors)
3634       rstart - first row in new global matrix generated
3635   */
3636   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3637 
3638   if (call == MAT_INITIAL_MATRIX) {
3639     /* (4) Create parallel newmat */
3640     PetscMPIInt    rank,size;
3641     PetscInt       csize;
3642 
3643     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3644     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3645 
3646     /*
3647         Determine the number of non-zeros in the diagonal and off-diagonal
3648         portions of the matrix in order to do correct preallocation
3649     */
3650 
3651     /* first get start and end of "diagonal" columns */
3652     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3653     if (csize == PETSC_DECIDE) {
3654       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3655       if (mglobal == Ncols) { /* square matrix */
3656         nlocal = m;
3657       } else {
3658         nlocal = Ncols/size + ((Ncols % size) > rank);
3659       }
3660     } else {
3661       nlocal = csize;
3662     }
3663     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3664     rstart = rend - nlocal;
3665     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3666 
3667     /* next, compute all the lengths */
3668     jj    = aij->j;
3669     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3670     olens = dlens + m;
3671     for (i=0; i<m; i++) {
3672       jend = ii[i+1] - ii[i];
3673       olen = 0;
3674       dlen = 0;
3675       for (j=0; j<jend; j++) {
3676         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3677         else dlen++;
3678         jj++;
3679       }
3680       olens[i] = olen;
3681       dlens[i] = dlen;
3682     }
3683 
3684     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3685     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3686 
3687     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3688     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3689     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3690     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3691     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3692     ierr = PetscFree(dlens);CHKERRQ(ierr);
3693 
3694   } else { /* call == MAT_REUSE_MATRIX */
3695     M    = *newmat;
3696     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3697     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3698     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3699     /*
3700          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3701        rather than the slower MatSetValues().
3702     */
3703     M->was_assembled = PETSC_TRUE;
3704     M->assembled     = PETSC_FALSE;
3705   }
3706 
3707   /* (5) Set values of Msub to *newmat */
3708   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3709   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3710 
3711   jj   = aij->j;
3712   aa   = aij->a;
3713   for (i=0; i<m; i++) {
3714     row = rstart + i;
3715     nz  = ii[i+1] - ii[i];
3716     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3717     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3718     jj += nz; aa += nz;
3719   }
3720   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3721 
3722   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3723   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3724 
3725   ierr = PetscFree(colsub);CHKERRQ(ierr);
3726 
3727   /* save Msub, iscol_sub and iscmap used in processor for next request */
3728   if (call ==  MAT_INITIAL_MATRIX) {
3729     *newmat = M;
3730     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3731     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3732 
3733     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3734     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3735 
3736     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3737     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3738 
3739     if (iscol_local) {
3740       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3741       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3742     }
3743   }
3744   PetscFunctionReturn(0);
3745 }
3746 
3747 /*
3748     Not great since it makes two copies of the submatrix, first an SeqAIJ
3749   in local and then by concatenating the local matrices the end result.
3750   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3751 
3752   Note: This requires a sequential iscol with all indices.
3753 */
3754 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3755 {
3756   PetscErrorCode ierr;
3757   PetscMPIInt    rank,size;
3758   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3759   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3760   Mat            M,Mreuse;
3761   MatScalar      *aa,*vwork;
3762   MPI_Comm       comm;
3763   Mat_SeqAIJ     *aij;
3764   PetscBool      colflag,allcolumns=PETSC_FALSE;
3765 
3766   PetscFunctionBegin;
3767   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3768   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3769   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3770 
3771   /* Check for special case: each processor gets entire matrix columns */
3772   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3773   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3774   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3775 
3776   if (call ==  MAT_REUSE_MATRIX) {
3777     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3778     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3779     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3780   } else {
3781     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3782   }
3783 
3784   /*
3785       m - number of local rows
3786       n - number of columns (same on all processors)
3787       rstart - first row in new global matrix generated
3788   */
3789   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3790   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3791   if (call == MAT_INITIAL_MATRIX) {
3792     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3793     ii  = aij->i;
3794     jj  = aij->j;
3795 
3796     /*
3797         Determine the number of non-zeros in the diagonal and off-diagonal
3798         portions of the matrix in order to do correct preallocation
3799     */
3800 
3801     /* first get start and end of "diagonal" columns */
3802     if (csize == PETSC_DECIDE) {
3803       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3804       if (mglobal == n) { /* square matrix */
3805         nlocal = m;
3806       } else {
3807         nlocal = n/size + ((n % size) > rank);
3808       }
3809     } else {
3810       nlocal = csize;
3811     }
3812     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3813     rstart = rend - nlocal;
3814     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3815 
3816     /* next, compute all the lengths */
3817     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3818     olens = dlens + m;
3819     for (i=0; i<m; i++) {
3820       jend = ii[i+1] - ii[i];
3821       olen = 0;
3822       dlen = 0;
3823       for (j=0; j<jend; j++) {
3824         if (*jj < rstart || *jj >= rend) olen++;
3825         else dlen++;
3826         jj++;
3827       }
3828       olens[i] = olen;
3829       dlens[i] = dlen;
3830     }
3831     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3832     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3833     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3834     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3835     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3836     ierr = PetscFree(dlens);CHKERRQ(ierr);
3837   } else {
3838     PetscInt ml,nl;
3839 
3840     M    = *newmat;
3841     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3842     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3843     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3844     /*
3845          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3846        rather than the slower MatSetValues().
3847     */
3848     M->was_assembled = PETSC_TRUE;
3849     M->assembled     = PETSC_FALSE;
3850   }
3851   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3852   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3853   ii   = aij->i;
3854   jj   = aij->j;
3855   aa   = aij->a;
3856   for (i=0; i<m; i++) {
3857     row   = rstart + i;
3858     nz    = ii[i+1] - ii[i];
3859     cwork = jj;     jj += nz;
3860     vwork = aa;     aa += nz;
3861     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3862   }
3863 
3864   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3865   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3866   *newmat = M;
3867 
3868   /* save submatrix used in processor for next request */
3869   if (call ==  MAT_INITIAL_MATRIX) {
3870     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3871     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3872   }
3873   PetscFunctionReturn(0);
3874 }
3875 
3876 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3877 {
3878   PetscInt       m,cstart, cend,j,nnz,i,d;
3879   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3880   const PetscInt *JJ;
3881   PetscScalar    *values;
3882   PetscErrorCode ierr;
3883   PetscBool      nooffprocentries;
3884 
3885   PetscFunctionBegin;
3886   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3887 
3888   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3889   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3890   m      = B->rmap->n;
3891   cstart = B->cmap->rstart;
3892   cend   = B->cmap->rend;
3893   rstart = B->rmap->rstart;
3894 
3895   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3896 
3897 #if defined(PETSC_USE_DEBUG)
3898   for (i=0; i<m; i++) {
3899     nnz = Ii[i+1]- Ii[i];
3900     JJ  = J + Ii[i];
3901     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3902     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3903     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3904   }
3905 #endif
3906 
3907   for (i=0; i<m; i++) {
3908     nnz     = Ii[i+1]- Ii[i];
3909     JJ      = J + Ii[i];
3910     nnz_max = PetscMax(nnz_max,nnz);
3911     d       = 0;
3912     for (j=0; j<nnz; j++) {
3913       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3914     }
3915     d_nnz[i] = d;
3916     o_nnz[i] = nnz - d;
3917   }
3918   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3919   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3920 
3921   if (v) values = (PetscScalar*)v;
3922   else {
3923     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3924   }
3925 
3926   for (i=0; i<m; i++) {
3927     ii   = i + rstart;
3928     nnz  = Ii[i+1]- Ii[i];
3929     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3930   }
3931   nooffprocentries    = B->nooffprocentries;
3932   B->nooffprocentries = PETSC_TRUE;
3933   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3934   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3935   B->nooffprocentries = nooffprocentries;
3936 
3937   if (!v) {
3938     ierr = PetscFree(values);CHKERRQ(ierr);
3939   }
3940   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3941   PetscFunctionReturn(0);
3942 }
3943 
3944 /*@
3945    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3946    (the default parallel PETSc format).
3947 
3948    Collective on MPI_Comm
3949 
3950    Input Parameters:
3951 +  B - the matrix
3952 .  i - the indices into j for the start of each local row (starts with zero)
3953 .  j - the column indices for each local row (starts with zero)
3954 -  v - optional values in the matrix
3955 
3956    Level: developer
3957 
3958    Notes:
3959        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3960      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3961      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3962 
3963        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3964 
3965        The format which is used for the sparse matrix input, is equivalent to a
3966     row-major ordering.. i.e for the following matrix, the input data expected is
3967     as shown
3968 
3969 $        1 0 0
3970 $        2 0 3     P0
3971 $       -------
3972 $        4 5 6     P1
3973 $
3974 $     Process0 [P0]: rows_owned=[0,1]
3975 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3976 $        j =  {0,0,2}  [size = 3]
3977 $        v =  {1,2,3}  [size = 3]
3978 $
3979 $     Process1 [P1]: rows_owned=[2]
3980 $        i =  {0,3}    [size = nrow+1  = 1+1]
3981 $        j =  {0,1,2}  [size = 3]
3982 $        v =  {4,5,6}  [size = 3]
3983 
3984 .keywords: matrix, aij, compressed row, sparse, parallel
3985 
3986 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3987           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3988 @*/
3989 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3990 {
3991   PetscErrorCode ierr;
3992 
3993   PetscFunctionBegin;
3994   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3995   PetscFunctionReturn(0);
3996 }
3997 
3998 /*@C
3999    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4000    (the default parallel PETSc format).  For good matrix assembly performance
4001    the user should preallocate the matrix storage by setting the parameters
4002    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4003    performance can be increased by more than a factor of 50.
4004 
4005    Collective on MPI_Comm
4006 
4007    Input Parameters:
4008 +  B - the matrix
4009 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4010            (same value is used for all local rows)
4011 .  d_nnz - array containing the number of nonzeros in the various rows of the
4012            DIAGONAL portion of the local submatrix (possibly different for each row)
4013            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4014            The size of this array is equal to the number of local rows, i.e 'm'.
4015            For matrices that will be factored, you must leave room for (and set)
4016            the diagonal entry even if it is zero.
4017 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4018            submatrix (same value is used for all local rows).
4019 -  o_nnz - array containing the number of nonzeros in the various rows of the
4020            OFF-DIAGONAL portion of the local submatrix (possibly different for
4021            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4022            structure. The size of this array is equal to the number
4023            of local rows, i.e 'm'.
4024 
4025    If the *_nnz parameter is given then the *_nz parameter is ignored
4026 
4027    The AIJ format (also called the Yale sparse matrix format or
4028    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4029    storage.  The stored row and column indices begin with zero.
4030    See Users-Manual: ch_mat for details.
4031 
4032    The parallel matrix is partitioned such that the first m0 rows belong to
4033    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4034    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4035 
4036    The DIAGONAL portion of the local submatrix of a processor can be defined
4037    as the submatrix which is obtained by extraction the part corresponding to
4038    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4039    first row that belongs to the processor, r2 is the last row belonging to
4040    the this processor, and c1-c2 is range of indices of the local part of a
4041    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4042    common case of a square matrix, the row and column ranges are the same and
4043    the DIAGONAL part is also square. The remaining portion of the local
4044    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4045 
4046    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4047 
4048    You can call MatGetInfo() to get information on how effective the preallocation was;
4049    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4050    You can also run with the option -info and look for messages with the string
4051    malloc in them to see if additional memory allocation was needed.
4052 
4053    Example usage:
4054 
4055    Consider the following 8x8 matrix with 34 non-zero values, that is
4056    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4057    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4058    as follows:
4059 
4060 .vb
4061             1  2  0  |  0  3  0  |  0  4
4062     Proc0   0  5  6  |  7  0  0  |  8  0
4063             9  0 10  | 11  0  0  | 12  0
4064     -------------------------------------
4065            13  0 14  | 15 16 17  |  0  0
4066     Proc1   0 18  0  | 19 20 21  |  0  0
4067             0  0  0  | 22 23  0  | 24  0
4068     -------------------------------------
4069     Proc2  25 26 27  |  0  0 28  | 29  0
4070            30  0  0  | 31 32 33  |  0 34
4071 .ve
4072 
4073    This can be represented as a collection of submatrices as:
4074 
4075 .vb
4076       A B C
4077       D E F
4078       G H I
4079 .ve
4080 
4081    Where the submatrices A,B,C are owned by proc0, D,E,F are
4082    owned by proc1, G,H,I are owned by proc2.
4083 
4084    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4085    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4086    The 'M','N' parameters are 8,8, and have the same values on all procs.
4087 
4088    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4089    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4090    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4091    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4092    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4093    matrix, ans [DF] as another SeqAIJ matrix.
4094 
4095    When d_nz, o_nz parameters are specified, d_nz storage elements are
4096    allocated for every row of the local diagonal submatrix, and o_nz
4097    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4098    One way to choose d_nz and o_nz is to use the max nonzerors per local
4099    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4100    In this case, the values of d_nz,o_nz are:
4101 .vb
4102      proc0 : dnz = 2, o_nz = 2
4103      proc1 : dnz = 3, o_nz = 2
4104      proc2 : dnz = 1, o_nz = 4
4105 .ve
4106    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4107    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4108    for proc3. i.e we are using 12+15+10=37 storage locations to store
4109    34 values.
4110 
4111    When d_nnz, o_nnz parameters are specified, the storage is specified
4112    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4113    In the above case the values for d_nnz,o_nnz are:
4114 .vb
4115      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4116      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4117      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4118 .ve
4119    Here the space allocated is sum of all the above values i.e 34, and
4120    hence pre-allocation is perfect.
4121 
4122    Level: intermediate
4123 
4124 .keywords: matrix, aij, compressed row, sparse, parallel
4125 
4126 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4127           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4128 @*/
4129 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4130 {
4131   PetscErrorCode ierr;
4132 
4133   PetscFunctionBegin;
4134   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4135   PetscValidType(B,1);
4136   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4137   PetscFunctionReturn(0);
4138 }
4139 
4140 /*@
4141      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4142          CSR format the local rows.
4143 
4144    Collective on MPI_Comm
4145 
4146    Input Parameters:
4147 +  comm - MPI communicator
4148 .  m - number of local rows (Cannot be PETSC_DECIDE)
4149 .  n - This value should be the same as the local size used in creating the
4150        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4151        calculated if N is given) For square matrices n is almost always m.
4152 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4153 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4154 .   i - row indices
4155 .   j - column indices
4156 -   a - matrix values
4157 
4158    Output Parameter:
4159 .   mat - the matrix
4160 
4161    Level: intermediate
4162 
4163    Notes:
4164        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4165      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4166      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4167 
4168        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4169 
4170        The format which is used for the sparse matrix input, is equivalent to a
4171     row-major ordering.. i.e for the following matrix, the input data expected is
4172     as shown
4173 
4174 $        1 0 0
4175 $        2 0 3     P0
4176 $       -------
4177 $        4 5 6     P1
4178 $
4179 $     Process0 [P0]: rows_owned=[0,1]
4180 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4181 $        j =  {0,0,2}  [size = 3]
4182 $        v =  {1,2,3}  [size = 3]
4183 $
4184 $     Process1 [P1]: rows_owned=[2]
4185 $        i =  {0,3}    [size = nrow+1  = 1+1]
4186 $        j =  {0,1,2}  [size = 3]
4187 $        v =  {4,5,6}  [size = 3]
4188 
4189 .keywords: matrix, aij, compressed row, sparse, parallel
4190 
4191 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4192           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4193 @*/
4194 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4195 {
4196   PetscErrorCode ierr;
4197 
4198   PetscFunctionBegin;
4199   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4200   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4201   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4202   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4203   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4204   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4205   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4206   PetscFunctionReturn(0);
4207 }
4208 
4209 /*@C
4210    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4211    (the default parallel PETSc format).  For good matrix assembly performance
4212    the user should preallocate the matrix storage by setting the parameters
4213    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4214    performance can be increased by more than a factor of 50.
4215 
4216    Collective on MPI_Comm
4217 
4218    Input Parameters:
4219 +  comm - MPI communicator
4220 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4221            This value should be the same as the local size used in creating the
4222            y vector for the matrix-vector product y = Ax.
4223 .  n - This value should be the same as the local size used in creating the
4224        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4225        calculated if N is given) For square matrices n is almost always m.
4226 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4227 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4228 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4229            (same value is used for all local rows)
4230 .  d_nnz - array containing the number of nonzeros in the various rows of the
4231            DIAGONAL portion of the local submatrix (possibly different for each row)
4232            or NULL, if d_nz is used to specify the nonzero structure.
4233            The size of this array is equal to the number of local rows, i.e 'm'.
4234 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4235            submatrix (same value is used for all local rows).
4236 -  o_nnz - array containing the number of nonzeros in the various rows of the
4237            OFF-DIAGONAL portion of the local submatrix (possibly different for
4238            each row) or NULL, if o_nz is used to specify the nonzero
4239            structure. The size of this array is equal to the number
4240            of local rows, i.e 'm'.
4241 
4242    Output Parameter:
4243 .  A - the matrix
4244 
4245    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4246    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4247    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4248 
4249    Notes:
4250    If the *_nnz parameter is given then the *_nz parameter is ignored
4251 
4252    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4253    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4254    storage requirements for this matrix.
4255 
4256    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4257    processor than it must be used on all processors that share the object for
4258    that argument.
4259 
4260    The user MUST specify either the local or global matrix dimensions
4261    (possibly both).
4262 
4263    The parallel matrix is partitioned across processors such that the
4264    first m0 rows belong to process 0, the next m1 rows belong to
4265    process 1, the next m2 rows belong to process 2 etc.. where
4266    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4267    values corresponding to [m x N] submatrix.
4268 
4269    The columns are logically partitioned with the n0 columns belonging
4270    to 0th partition, the next n1 columns belonging to the next
4271    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4272 
4273    The DIAGONAL portion of the local submatrix on any given processor
4274    is the submatrix corresponding to the rows and columns m,n
4275    corresponding to the given processor. i.e diagonal matrix on
4276    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4277    etc. The remaining portion of the local submatrix [m x (N-n)]
4278    constitute the OFF-DIAGONAL portion. The example below better
4279    illustrates this concept.
4280 
4281    For a square global matrix we define each processor's diagonal portion
4282    to be its local rows and the corresponding columns (a square submatrix);
4283    each processor's off-diagonal portion encompasses the remainder of the
4284    local matrix (a rectangular submatrix).
4285 
4286    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4287 
4288    When calling this routine with a single process communicator, a matrix of
4289    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4290    type of communicator, use the construction mechanism
4291 .vb
4292      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4293 .ve
4294 
4295 $     MatCreate(...,&A);
4296 $     MatSetType(A,MATMPIAIJ);
4297 $     MatSetSizes(A, m,n,M,N);
4298 $     MatMPIAIJSetPreallocation(A,...);
4299 
4300    By default, this format uses inodes (identical nodes) when possible.
4301    We search for consecutive rows with the same nonzero structure, thereby
4302    reusing matrix information to achieve increased efficiency.
4303 
4304    Options Database Keys:
4305 +  -mat_no_inode  - Do not use inodes
4306 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4307 
4308 
4309 
4310    Example usage:
4311 
4312    Consider the following 8x8 matrix with 34 non-zero values, that is
4313    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4314    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4315    as follows
4316 
4317 .vb
4318             1  2  0  |  0  3  0  |  0  4
4319     Proc0   0  5  6  |  7  0  0  |  8  0
4320             9  0 10  | 11  0  0  | 12  0
4321     -------------------------------------
4322            13  0 14  | 15 16 17  |  0  0
4323     Proc1   0 18  0  | 19 20 21  |  0  0
4324             0  0  0  | 22 23  0  | 24  0
4325     -------------------------------------
4326     Proc2  25 26 27  |  0  0 28  | 29  0
4327            30  0  0  | 31 32 33  |  0 34
4328 .ve
4329 
4330    This can be represented as a collection of submatrices as
4331 
4332 .vb
4333       A B C
4334       D E F
4335       G H I
4336 .ve
4337 
4338    Where the submatrices A,B,C are owned by proc0, D,E,F are
4339    owned by proc1, G,H,I are owned by proc2.
4340 
4341    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4342    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4343    The 'M','N' parameters are 8,8, and have the same values on all procs.
4344 
4345    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4346    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4347    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4348    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4349    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4350    matrix, ans [DF] as another SeqAIJ matrix.
4351 
4352    When d_nz, o_nz parameters are specified, d_nz storage elements are
4353    allocated for every row of the local diagonal submatrix, and o_nz
4354    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4355    One way to choose d_nz and o_nz is to use the max nonzerors per local
4356    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4357    In this case, the values of d_nz,o_nz are
4358 .vb
4359      proc0 : dnz = 2, o_nz = 2
4360      proc1 : dnz = 3, o_nz = 2
4361      proc2 : dnz = 1, o_nz = 4
4362 .ve
4363    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4364    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4365    for proc3. i.e we are using 12+15+10=37 storage locations to store
4366    34 values.
4367 
4368    When d_nnz, o_nnz parameters are specified, the storage is specified
4369    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4370    In the above case the values for d_nnz,o_nnz are
4371 .vb
4372      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4373      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4374      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4375 .ve
4376    Here the space allocated is sum of all the above values i.e 34, and
4377    hence pre-allocation is perfect.
4378 
4379    Level: intermediate
4380 
4381 .keywords: matrix, aij, compressed row, sparse, parallel
4382 
4383 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4384           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4385 @*/
4386 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4387 {
4388   PetscErrorCode ierr;
4389   PetscMPIInt    size;
4390 
4391   PetscFunctionBegin;
4392   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4393   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4394   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4395   if (size > 1) {
4396     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4397     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4398   } else {
4399     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4400     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4401   }
4402   PetscFunctionReturn(0);
4403 }
4404 
4405 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4406 {
4407   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4408   PetscBool      flg;
4409   PetscErrorCode ierr;
4410 
4411   PetscFunctionBegin;
4412   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4413   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4414   if (Ad)     *Ad     = a->A;
4415   if (Ao)     *Ao     = a->B;
4416   if (colmap) *colmap = a->garray;
4417   PetscFunctionReturn(0);
4418 }
4419 
4420 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4421 {
4422   PetscErrorCode ierr;
4423   PetscInt       m,N,i,rstart,nnz,Ii;
4424   PetscInt       *indx;
4425   PetscScalar    *values;
4426 
4427   PetscFunctionBegin;
4428   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4429   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4430     PetscInt       *dnz,*onz,sum,bs,cbs;
4431 
4432     if (n == PETSC_DECIDE) {
4433       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4434     }
4435     /* Check sum(n) = N */
4436     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4437     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4438 
4439     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4440     rstart -= m;
4441 
4442     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4443     for (i=0; i<m; i++) {
4444       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4445       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4446       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4447     }
4448 
4449     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4450     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4451     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4452     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4453     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4454     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4455     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4456     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4457   }
4458 
4459   /* numeric phase */
4460   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4461   for (i=0; i<m; i++) {
4462     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4463     Ii   = i + rstart;
4464     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4465     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4466   }
4467   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4468   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4469   PetscFunctionReturn(0);
4470 }
4471 
4472 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4473 {
4474   PetscErrorCode    ierr;
4475   PetscMPIInt       rank;
4476   PetscInt          m,N,i,rstart,nnz;
4477   size_t            len;
4478   const PetscInt    *indx;
4479   PetscViewer       out;
4480   char              *name;
4481   Mat               B;
4482   const PetscScalar *values;
4483 
4484   PetscFunctionBegin;
4485   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4486   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4487   /* Should this be the type of the diagonal block of A? */
4488   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4489   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4490   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4491   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4492   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4493   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4494   for (i=0; i<m; i++) {
4495     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4496     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4497     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4498   }
4499   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4500   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4501 
4502   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4503   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4504   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4505   sprintf(name,"%s.%d",outfile,rank);
4506   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4507   ierr = PetscFree(name);CHKERRQ(ierr);
4508   ierr = MatView(B,out);CHKERRQ(ierr);
4509   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4510   ierr = MatDestroy(&B);CHKERRQ(ierr);
4511   PetscFunctionReturn(0);
4512 }
4513 
4514 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4515 {
4516   PetscErrorCode      ierr;
4517   Mat_Merge_SeqsToMPI *merge;
4518   PetscContainer      container;
4519 
4520   PetscFunctionBegin;
4521   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4522   if (container) {
4523     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4524     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4525     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4526     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4527     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4528     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4529     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4530     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4531     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4532     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4533     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4534     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4535     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4536     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4537     ierr = PetscFree(merge);CHKERRQ(ierr);
4538     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4539   }
4540   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4541   PetscFunctionReturn(0);
4542 }
4543 
4544 #include <../src/mat/utils/freespace.h>
4545 #include <petscbt.h>
4546 
4547 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4548 {
4549   PetscErrorCode      ierr;
4550   MPI_Comm            comm;
4551   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4552   PetscMPIInt         size,rank,taga,*len_s;
4553   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4554   PetscInt            proc,m;
4555   PetscInt            **buf_ri,**buf_rj;
4556   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4557   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4558   MPI_Request         *s_waits,*r_waits;
4559   MPI_Status          *status;
4560   MatScalar           *aa=a->a;
4561   MatScalar           **abuf_r,*ba_i;
4562   Mat_Merge_SeqsToMPI *merge;
4563   PetscContainer      container;
4564 
4565   PetscFunctionBegin;
4566   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4567   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4568 
4569   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4570   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4571 
4572   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4573   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4574 
4575   bi     = merge->bi;
4576   bj     = merge->bj;
4577   buf_ri = merge->buf_ri;
4578   buf_rj = merge->buf_rj;
4579 
4580   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4581   owners = merge->rowmap->range;
4582   len_s  = merge->len_s;
4583 
4584   /* send and recv matrix values */
4585   /*-----------------------------*/
4586   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4587   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4588 
4589   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4590   for (proc=0,k=0; proc<size; proc++) {
4591     if (!len_s[proc]) continue;
4592     i    = owners[proc];
4593     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4594     k++;
4595   }
4596 
4597   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4598   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4599   ierr = PetscFree(status);CHKERRQ(ierr);
4600 
4601   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4602   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4603 
4604   /* insert mat values of mpimat */
4605   /*----------------------------*/
4606   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4607   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4608 
4609   for (k=0; k<merge->nrecv; k++) {
4610     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4611     nrows       = *(buf_ri_k[k]);
4612     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4613     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4614   }
4615 
4616   /* set values of ba */
4617   m = merge->rowmap->n;
4618   for (i=0; i<m; i++) {
4619     arow = owners[rank] + i;
4620     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4621     bnzi = bi[i+1] - bi[i];
4622     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4623 
4624     /* add local non-zero vals of this proc's seqmat into ba */
4625     anzi   = ai[arow+1] - ai[arow];
4626     aj     = a->j + ai[arow];
4627     aa     = a->a + ai[arow];
4628     nextaj = 0;
4629     for (j=0; nextaj<anzi; j++) {
4630       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4631         ba_i[j] += aa[nextaj++];
4632       }
4633     }
4634 
4635     /* add received vals into ba */
4636     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4637       /* i-th row */
4638       if (i == *nextrow[k]) {
4639         anzi   = *(nextai[k]+1) - *nextai[k];
4640         aj     = buf_rj[k] + *(nextai[k]);
4641         aa     = abuf_r[k] + *(nextai[k]);
4642         nextaj = 0;
4643         for (j=0; nextaj<anzi; j++) {
4644           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4645             ba_i[j] += aa[nextaj++];
4646           }
4647         }
4648         nextrow[k]++; nextai[k]++;
4649       }
4650     }
4651     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4652   }
4653   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4654   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4655 
4656   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4657   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4658   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4659   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4660   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4661   PetscFunctionReturn(0);
4662 }
4663 
4664 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4665 {
4666   PetscErrorCode      ierr;
4667   Mat                 B_mpi;
4668   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4669   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4670   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4671   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4672   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4673   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4674   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4675   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4676   MPI_Status          *status;
4677   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4678   PetscBT             lnkbt;
4679   Mat_Merge_SeqsToMPI *merge;
4680   PetscContainer      container;
4681 
4682   PetscFunctionBegin;
4683   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4684 
4685   /* make sure it is a PETSc comm */
4686   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4687   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4688   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4689 
4690   ierr = PetscNew(&merge);CHKERRQ(ierr);
4691   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4692 
4693   /* determine row ownership */
4694   /*---------------------------------------------------------*/
4695   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4696   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4697   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4698   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4699   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4700   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4701   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4702 
4703   m      = merge->rowmap->n;
4704   owners = merge->rowmap->range;
4705 
4706   /* determine the number of messages to send, their lengths */
4707   /*---------------------------------------------------------*/
4708   len_s = merge->len_s;
4709 
4710   len          = 0; /* length of buf_si[] */
4711   merge->nsend = 0;
4712   for (proc=0; proc<size; proc++) {
4713     len_si[proc] = 0;
4714     if (proc == rank) {
4715       len_s[proc] = 0;
4716     } else {
4717       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4718       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4719     }
4720     if (len_s[proc]) {
4721       merge->nsend++;
4722       nrows = 0;
4723       for (i=owners[proc]; i<owners[proc+1]; i++) {
4724         if (ai[i+1] > ai[i]) nrows++;
4725       }
4726       len_si[proc] = 2*(nrows+1);
4727       len         += len_si[proc];
4728     }
4729   }
4730 
4731   /* determine the number and length of messages to receive for ij-structure */
4732   /*-------------------------------------------------------------------------*/
4733   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4734   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4735 
4736   /* post the Irecv of j-structure */
4737   /*-------------------------------*/
4738   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4739   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4740 
4741   /* post the Isend of j-structure */
4742   /*--------------------------------*/
4743   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4744 
4745   for (proc=0, k=0; proc<size; proc++) {
4746     if (!len_s[proc]) continue;
4747     i    = owners[proc];
4748     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4749     k++;
4750   }
4751 
4752   /* receives and sends of j-structure are complete */
4753   /*------------------------------------------------*/
4754   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4755   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4756 
4757   /* send and recv i-structure */
4758   /*---------------------------*/
4759   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4760   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4761 
4762   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4763   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4764   for (proc=0,k=0; proc<size; proc++) {
4765     if (!len_s[proc]) continue;
4766     /* form outgoing message for i-structure:
4767          buf_si[0]:                 nrows to be sent
4768                [1:nrows]:           row index (global)
4769                [nrows+1:2*nrows+1]: i-structure index
4770     */
4771     /*-------------------------------------------*/
4772     nrows       = len_si[proc]/2 - 1;
4773     buf_si_i    = buf_si + nrows+1;
4774     buf_si[0]   = nrows;
4775     buf_si_i[0] = 0;
4776     nrows       = 0;
4777     for (i=owners[proc]; i<owners[proc+1]; i++) {
4778       anzi = ai[i+1] - ai[i];
4779       if (anzi) {
4780         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4781         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4782         nrows++;
4783       }
4784     }
4785     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4786     k++;
4787     buf_si += len_si[proc];
4788   }
4789 
4790   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4791   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4792 
4793   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4794   for (i=0; i<merge->nrecv; i++) {
4795     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4796   }
4797 
4798   ierr = PetscFree(len_si);CHKERRQ(ierr);
4799   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4800   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4801   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4802   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4803   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4804   ierr = PetscFree(status);CHKERRQ(ierr);
4805 
4806   /* compute a local seq matrix in each processor */
4807   /*----------------------------------------------*/
4808   /* allocate bi array and free space for accumulating nonzero column info */
4809   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4810   bi[0] = 0;
4811 
4812   /* create and initialize a linked list */
4813   nlnk = N+1;
4814   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4815 
4816   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4817   len  = ai[owners[rank+1]] - ai[owners[rank]];
4818   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4819 
4820   current_space = free_space;
4821 
4822   /* determine symbolic info for each local row */
4823   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4824 
4825   for (k=0; k<merge->nrecv; k++) {
4826     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4827     nrows       = *buf_ri_k[k];
4828     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4829     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4830   }
4831 
4832   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4833   len  = 0;
4834   for (i=0; i<m; i++) {
4835     bnzi = 0;
4836     /* add local non-zero cols of this proc's seqmat into lnk */
4837     arow  = owners[rank] + i;
4838     anzi  = ai[arow+1] - ai[arow];
4839     aj    = a->j + ai[arow];
4840     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4841     bnzi += nlnk;
4842     /* add received col data into lnk */
4843     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4844       if (i == *nextrow[k]) { /* i-th row */
4845         anzi  = *(nextai[k]+1) - *nextai[k];
4846         aj    = buf_rj[k] + *nextai[k];
4847         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4848         bnzi += nlnk;
4849         nextrow[k]++; nextai[k]++;
4850       }
4851     }
4852     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4853 
4854     /* if free space is not available, make more free space */
4855     if (current_space->local_remaining<bnzi) {
4856       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4857       nspacedouble++;
4858     }
4859     /* copy data into free space, then initialize lnk */
4860     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4861     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4862 
4863     current_space->array           += bnzi;
4864     current_space->local_used      += bnzi;
4865     current_space->local_remaining -= bnzi;
4866 
4867     bi[i+1] = bi[i] + bnzi;
4868   }
4869 
4870   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4871 
4872   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4873   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4874   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4875 
4876   /* create symbolic parallel matrix B_mpi */
4877   /*---------------------------------------*/
4878   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4879   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4880   if (n==PETSC_DECIDE) {
4881     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4882   } else {
4883     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4884   }
4885   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4886   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4887   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4888   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4889   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4890 
4891   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4892   B_mpi->assembled    = PETSC_FALSE;
4893   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4894   merge->bi           = bi;
4895   merge->bj           = bj;
4896   merge->buf_ri       = buf_ri;
4897   merge->buf_rj       = buf_rj;
4898   merge->coi          = NULL;
4899   merge->coj          = NULL;
4900   merge->owners_co    = NULL;
4901 
4902   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4903 
4904   /* attach the supporting struct to B_mpi for reuse */
4905   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4906   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4907   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4908   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4909   *mpimat = B_mpi;
4910 
4911   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4912   PetscFunctionReturn(0);
4913 }
4914 
4915 /*@C
4916       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4917                  matrices from each processor
4918 
4919     Collective on MPI_Comm
4920 
4921    Input Parameters:
4922 +    comm - the communicators the parallel matrix will live on
4923 .    seqmat - the input sequential matrices
4924 .    m - number of local rows (or PETSC_DECIDE)
4925 .    n - number of local columns (or PETSC_DECIDE)
4926 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4927 
4928    Output Parameter:
4929 .    mpimat - the parallel matrix generated
4930 
4931     Level: advanced
4932 
4933    Notes:
4934      The dimensions of the sequential matrix in each processor MUST be the same.
4935      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4936      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4937 @*/
4938 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4939 {
4940   PetscErrorCode ierr;
4941   PetscMPIInt    size;
4942 
4943   PetscFunctionBegin;
4944   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4945   if (size == 1) {
4946     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4947     if (scall == MAT_INITIAL_MATRIX) {
4948       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4949     } else {
4950       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4951     }
4952     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4953     PetscFunctionReturn(0);
4954   }
4955   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4956   if (scall == MAT_INITIAL_MATRIX) {
4957     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4958   }
4959   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4960   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4961   PetscFunctionReturn(0);
4962 }
4963 
4964 /*@
4965      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4966           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4967           with MatGetSize()
4968 
4969     Not Collective
4970 
4971    Input Parameters:
4972 +    A - the matrix
4973 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4974 
4975    Output Parameter:
4976 .    A_loc - the local sequential matrix generated
4977 
4978     Level: developer
4979 
4980 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4981 
4982 @*/
4983 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4984 {
4985   PetscErrorCode ierr;
4986   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4987   Mat_SeqAIJ     *mat,*a,*b;
4988   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4989   MatScalar      *aa,*ba,*cam;
4990   PetscScalar    *ca;
4991   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4992   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4993   PetscBool      match;
4994   MPI_Comm       comm;
4995   PetscMPIInt    size;
4996 
4997   PetscFunctionBegin;
4998   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4999   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5000   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5001   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5002   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5003 
5004   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5005   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5006   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5007   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5008   aa = a->a; ba = b->a;
5009   if (scall == MAT_INITIAL_MATRIX) {
5010     if (size == 1) {
5011       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5012       PetscFunctionReturn(0);
5013     }
5014 
5015     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5016     ci[0] = 0;
5017     for (i=0; i<am; i++) {
5018       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5019     }
5020     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5021     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5022     k    = 0;
5023     for (i=0; i<am; i++) {
5024       ncols_o = bi[i+1] - bi[i];
5025       ncols_d = ai[i+1] - ai[i];
5026       /* off-diagonal portion of A */
5027       for (jo=0; jo<ncols_o; jo++) {
5028         col = cmap[*bj];
5029         if (col >= cstart) break;
5030         cj[k]   = col; bj++;
5031         ca[k++] = *ba++;
5032       }
5033       /* diagonal portion of A */
5034       for (j=0; j<ncols_d; j++) {
5035         cj[k]   = cstart + *aj++;
5036         ca[k++] = *aa++;
5037       }
5038       /* off-diagonal portion of A */
5039       for (j=jo; j<ncols_o; j++) {
5040         cj[k]   = cmap[*bj++];
5041         ca[k++] = *ba++;
5042       }
5043     }
5044     /* put together the new matrix */
5045     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5046     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5047     /* Since these are PETSc arrays, change flags to free them as necessary. */
5048     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5049     mat->free_a  = PETSC_TRUE;
5050     mat->free_ij = PETSC_TRUE;
5051     mat->nonew   = 0;
5052   } else if (scall == MAT_REUSE_MATRIX) {
5053     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5054     ci = mat->i; cj = mat->j; cam = mat->a;
5055     for (i=0; i<am; i++) {
5056       /* off-diagonal portion of A */
5057       ncols_o = bi[i+1] - bi[i];
5058       for (jo=0; jo<ncols_o; jo++) {
5059         col = cmap[*bj];
5060         if (col >= cstart) break;
5061         *cam++ = *ba++; bj++;
5062       }
5063       /* diagonal portion of A */
5064       ncols_d = ai[i+1] - ai[i];
5065       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5066       /* off-diagonal portion of A */
5067       for (j=jo; j<ncols_o; j++) {
5068         *cam++ = *ba++; bj++;
5069       }
5070     }
5071   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5072   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5073   PetscFunctionReturn(0);
5074 }
5075 
5076 /*@C
5077      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5078 
5079     Not Collective
5080 
5081    Input Parameters:
5082 +    A - the matrix
5083 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5084 -    row, col - index sets of rows and columns to extract (or NULL)
5085 
5086    Output Parameter:
5087 .    A_loc - the local sequential matrix generated
5088 
5089     Level: developer
5090 
5091 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5092 
5093 @*/
5094 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5095 {
5096   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5097   PetscErrorCode ierr;
5098   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5099   IS             isrowa,iscola;
5100   Mat            *aloc;
5101   PetscBool      match;
5102 
5103   PetscFunctionBegin;
5104   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5105   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5106   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5107   if (!row) {
5108     start = A->rmap->rstart; end = A->rmap->rend;
5109     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5110   } else {
5111     isrowa = *row;
5112   }
5113   if (!col) {
5114     start = A->cmap->rstart;
5115     cmap  = a->garray;
5116     nzA   = a->A->cmap->n;
5117     nzB   = a->B->cmap->n;
5118     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5119     ncols = 0;
5120     for (i=0; i<nzB; i++) {
5121       if (cmap[i] < start) idx[ncols++] = cmap[i];
5122       else break;
5123     }
5124     imark = i;
5125     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5126     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5127     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5128   } else {
5129     iscola = *col;
5130   }
5131   if (scall != MAT_INITIAL_MATRIX) {
5132     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5133     aloc[0] = *A_loc;
5134   }
5135   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5136   if (!col) { /* attach global id of condensed columns */
5137     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5138   }
5139   *A_loc = aloc[0];
5140   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5141   if (!row) {
5142     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5143   }
5144   if (!col) {
5145     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5146   }
5147   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5148   PetscFunctionReturn(0);
5149 }
5150 
5151 /*@C
5152     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5153 
5154     Collective on Mat
5155 
5156    Input Parameters:
5157 +    A,B - the matrices in mpiaij format
5158 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5159 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5160 
5161    Output Parameter:
5162 +    rowb, colb - index sets of rows and columns of B to extract
5163 -    B_seq - the sequential matrix generated
5164 
5165     Level: developer
5166 
5167 @*/
5168 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5169 {
5170   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5171   PetscErrorCode ierr;
5172   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5173   IS             isrowb,iscolb;
5174   Mat            *bseq=NULL;
5175 
5176   PetscFunctionBegin;
5177   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5178     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5179   }
5180   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5181 
5182   if (scall == MAT_INITIAL_MATRIX) {
5183     start = A->cmap->rstart;
5184     cmap  = a->garray;
5185     nzA   = a->A->cmap->n;
5186     nzB   = a->B->cmap->n;
5187     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5188     ncols = 0;
5189     for (i=0; i<nzB; i++) {  /* row < local row index */
5190       if (cmap[i] < start) idx[ncols++] = cmap[i];
5191       else break;
5192     }
5193     imark = i;
5194     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5195     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5196     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5197     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5198   } else {
5199     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5200     isrowb  = *rowb; iscolb = *colb;
5201     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5202     bseq[0] = *B_seq;
5203   }
5204   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5205   *B_seq = bseq[0];
5206   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5207   if (!rowb) {
5208     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5209   } else {
5210     *rowb = isrowb;
5211   }
5212   if (!colb) {
5213     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5214   } else {
5215     *colb = iscolb;
5216   }
5217   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5218   PetscFunctionReturn(0);
5219 }
5220 
5221 /*
5222     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5223     of the OFF-DIAGONAL portion of local A
5224 
5225     Collective on Mat
5226 
5227    Input Parameters:
5228 +    A,B - the matrices in mpiaij format
5229 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5230 
5231    Output Parameter:
5232 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5233 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5234 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5235 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5236 
5237     Level: developer
5238 
5239 */
5240 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5241 {
5242   VecScatter_MPI_General *gen_to,*gen_from;
5243   PetscErrorCode         ierr;
5244   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5245   Mat_SeqAIJ             *b_oth;
5246   VecScatter             ctx;
5247   MPI_Comm               comm;
5248   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5249   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5250   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5251   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5252   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5253   MPI_Request            *rwaits = NULL,*swaits = NULL;
5254   MPI_Status             *sstatus,rstatus;
5255   PetscMPIInt            jj,size;
5256   VecScatterType         type;
5257   PetscBool              mpi1;
5258 
5259   PetscFunctionBegin;
5260   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5261   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5262 
5263   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5264     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5265   }
5266   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5267   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5268 
5269   if (size == 1) {
5270     startsj_s = NULL;
5271     bufa_ptr  = NULL;
5272     *B_oth    = NULL;
5273     PetscFunctionReturn(0);
5274   }
5275 
5276   ctx = a->Mvctx;
5277   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5278   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5279   if (!mpi1) {
5280     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5281      thus create a->Mvctx_mpi1 */
5282     if (!a->Mvctx_mpi1) {
5283       a->Mvctx_mpi1_flg = PETSC_TRUE;
5284       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5285     }
5286     ctx = a->Mvctx_mpi1;
5287   }
5288   tag = ((PetscObject)ctx)->tag;
5289 
5290   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5291   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5292   nrecvs   = gen_from->n;
5293   nsends   = gen_to->n;
5294 
5295   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5296   srow    = gen_to->indices;    /* local row index to be sent */
5297   sstarts = gen_to->starts;
5298   sprocs  = gen_to->procs;
5299   sstatus = gen_to->sstatus;
5300   sbs     = gen_to->bs;
5301   rstarts = gen_from->starts;
5302   rprocs  = gen_from->procs;
5303   rbs     = gen_from->bs;
5304 
5305   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5306   if (scall == MAT_INITIAL_MATRIX) {
5307     /* i-array */
5308     /*---------*/
5309     /*  post receives */
5310     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5311     for (i=0; i<nrecvs; i++) {
5312       rowlen = rvalues + rstarts[i]*rbs;
5313       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5314       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5315     }
5316 
5317     /* pack the outgoing message */
5318     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5319 
5320     sstartsj[0] = 0;
5321     rstartsj[0] = 0;
5322     len         = 0; /* total length of j or a array to be sent */
5323     k           = 0;
5324     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5325     for (i=0; i<nsends; i++) {
5326       rowlen = svalues + sstarts[i]*sbs;
5327       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5328       for (j=0; j<nrows; j++) {
5329         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5330         for (l=0; l<sbs; l++) {
5331           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5332 
5333           rowlen[j*sbs+l] = ncols;
5334 
5335           len += ncols;
5336           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5337         }
5338         k++;
5339       }
5340       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5341 
5342       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5343     }
5344     /* recvs and sends of i-array are completed */
5345     i = nrecvs;
5346     while (i--) {
5347       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5348     }
5349     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5350     ierr = PetscFree(svalues);CHKERRQ(ierr);
5351 
5352     /* allocate buffers for sending j and a arrays */
5353     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5354     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5355 
5356     /* create i-array of B_oth */
5357     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5358 
5359     b_othi[0] = 0;
5360     len       = 0; /* total length of j or a array to be received */
5361     k         = 0;
5362     for (i=0; i<nrecvs; i++) {
5363       rowlen = rvalues + rstarts[i]*rbs;
5364       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5365       for (j=0; j<nrows; j++) {
5366         b_othi[k+1] = b_othi[k] + rowlen[j];
5367         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5368         k++;
5369       }
5370       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5371     }
5372     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5373 
5374     /* allocate space for j and a arrrays of B_oth */
5375     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5376     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5377 
5378     /* j-array */
5379     /*---------*/
5380     /*  post receives of j-array */
5381     for (i=0; i<nrecvs; i++) {
5382       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5383       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5384     }
5385 
5386     /* pack the outgoing message j-array */
5387     k = 0;
5388     for (i=0; i<nsends; i++) {
5389       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5390       bufJ  = bufj+sstartsj[i];
5391       for (j=0; j<nrows; j++) {
5392         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5393         for (ll=0; ll<sbs; ll++) {
5394           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5395           for (l=0; l<ncols; l++) {
5396             *bufJ++ = cols[l];
5397           }
5398           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5399         }
5400       }
5401       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5402     }
5403 
5404     /* recvs and sends of j-array are completed */
5405     i = nrecvs;
5406     while (i--) {
5407       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5408     }
5409     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5410   } else if (scall == MAT_REUSE_MATRIX) {
5411     sstartsj = *startsj_s;
5412     rstartsj = *startsj_r;
5413     bufa     = *bufa_ptr;
5414     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5415     b_otha   = b_oth->a;
5416   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5417 
5418   /* a-array */
5419   /*---------*/
5420   /*  post receives of a-array */
5421   for (i=0; i<nrecvs; i++) {
5422     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5423     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5424   }
5425 
5426   /* pack the outgoing message a-array */
5427   k = 0;
5428   for (i=0; i<nsends; i++) {
5429     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5430     bufA  = bufa+sstartsj[i];
5431     for (j=0; j<nrows; j++) {
5432       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5433       for (ll=0; ll<sbs; ll++) {
5434         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5435         for (l=0; l<ncols; l++) {
5436           *bufA++ = vals[l];
5437         }
5438         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5439       }
5440     }
5441     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5442   }
5443   /* recvs and sends of a-array are completed */
5444   i = nrecvs;
5445   while (i--) {
5446     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5447   }
5448   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5449   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5450 
5451   if (scall == MAT_INITIAL_MATRIX) {
5452     /* put together the new matrix */
5453     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5454 
5455     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5456     /* Since these are PETSc arrays, change flags to free them as necessary. */
5457     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5458     b_oth->free_a  = PETSC_TRUE;
5459     b_oth->free_ij = PETSC_TRUE;
5460     b_oth->nonew   = 0;
5461 
5462     ierr = PetscFree(bufj);CHKERRQ(ierr);
5463     if (!startsj_s || !bufa_ptr) {
5464       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5465       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5466     } else {
5467       *startsj_s = sstartsj;
5468       *startsj_r = rstartsj;
5469       *bufa_ptr  = bufa;
5470     }
5471   }
5472   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5473   PetscFunctionReturn(0);
5474 }
5475 
5476 /*@C
5477   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5478 
5479   Not Collective
5480 
5481   Input Parameters:
5482 . A - The matrix in mpiaij format
5483 
5484   Output Parameter:
5485 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5486 . colmap - A map from global column index to local index into lvec
5487 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5488 
5489   Level: developer
5490 
5491 @*/
5492 #if defined(PETSC_USE_CTABLE)
5493 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5494 #else
5495 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5496 #endif
5497 {
5498   Mat_MPIAIJ *a;
5499 
5500   PetscFunctionBegin;
5501   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5502   PetscValidPointer(lvec, 2);
5503   PetscValidPointer(colmap, 3);
5504   PetscValidPointer(multScatter, 4);
5505   a = (Mat_MPIAIJ*) A->data;
5506   if (lvec) *lvec = a->lvec;
5507   if (colmap) *colmap = a->colmap;
5508   if (multScatter) *multScatter = a->Mvctx;
5509   PetscFunctionReturn(0);
5510 }
5511 
5512 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5513 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5514 #if defined(PETSC_HAVE_MKL_SPARSE)
5515 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5516 #endif
5517 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5518 #if defined(PETSC_HAVE_ELEMENTAL)
5519 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5520 #endif
5521 #if defined(PETSC_HAVE_HYPRE)
5522 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5523 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5524 #endif
5525 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5526 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5527 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5528 
5529 /*
5530     Computes (B'*A')' since computing B*A directly is untenable
5531 
5532                n                       p                          p
5533         (              )       (              )         (                  )
5534       m (      A       )  *  n (       B      )   =   m (         C        )
5535         (              )       (              )         (                  )
5536 
5537 */
5538 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5539 {
5540   PetscErrorCode ierr;
5541   Mat            At,Bt,Ct;
5542 
5543   PetscFunctionBegin;
5544   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5545   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5546   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5547   ierr = MatDestroy(&At);CHKERRQ(ierr);
5548   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5549   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5550   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5551   PetscFunctionReturn(0);
5552 }
5553 
5554 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5555 {
5556   PetscErrorCode ierr;
5557   PetscInt       m=A->rmap->n,n=B->cmap->n;
5558   Mat            Cmat;
5559 
5560   PetscFunctionBegin;
5561   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5562   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5563   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5564   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5565   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5566   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5567   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5568   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5569 
5570   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5571 
5572   *C = Cmat;
5573   PetscFunctionReturn(0);
5574 }
5575 
5576 /* ----------------------------------------------------------------*/
5577 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5578 {
5579   PetscErrorCode ierr;
5580 
5581   PetscFunctionBegin;
5582   if (scall == MAT_INITIAL_MATRIX) {
5583     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5584     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5585     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5586   }
5587   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5588   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5589   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5590   PetscFunctionReturn(0);
5591 }
5592 
5593 /*MC
5594    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5595 
5596    Options Database Keys:
5597 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5598 
5599   Level: beginner
5600 
5601 .seealso: MatCreateAIJ()
5602 M*/
5603 
5604 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5605 {
5606   Mat_MPIAIJ     *b;
5607   PetscErrorCode ierr;
5608   PetscMPIInt    size;
5609 
5610   PetscFunctionBegin;
5611   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5612 
5613   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5614   B->data       = (void*)b;
5615   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5616   B->assembled  = PETSC_FALSE;
5617   B->insertmode = NOT_SET_VALUES;
5618   b->size       = size;
5619 
5620   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5621 
5622   /* build cache for off array entries formed */
5623   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5624 
5625   b->donotstash  = PETSC_FALSE;
5626   b->colmap      = 0;
5627   b->garray      = 0;
5628   b->roworiented = PETSC_TRUE;
5629 
5630   /* stuff used for matrix vector multiply */
5631   b->lvec  = NULL;
5632   b->Mvctx = NULL;
5633 
5634   /* stuff for MatGetRow() */
5635   b->rowindices   = 0;
5636   b->rowvalues    = 0;
5637   b->getrowactive = PETSC_FALSE;
5638 
5639   /* flexible pointer used in CUSP/CUSPARSE classes */
5640   b->spptr = NULL;
5641 
5642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5649   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5651 #if defined(PETSC_HAVE_MKL_SPARSE)
5652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5653 #endif
5654   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5656 #if defined(PETSC_HAVE_ELEMENTAL)
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5658 #endif
5659 #if defined(PETSC_HAVE_HYPRE)
5660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5661 #endif
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5667 #if defined(PETSC_HAVE_HYPRE)
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5669 #endif
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5671   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5672   PetscFunctionReturn(0);
5673 }
5674 
5675 /*@C
5676      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5677          and "off-diagonal" part of the matrix in CSR format.
5678 
5679    Collective on MPI_Comm
5680 
5681    Input Parameters:
5682 +  comm - MPI communicator
5683 .  m - number of local rows (Cannot be PETSC_DECIDE)
5684 .  n - This value should be the same as the local size used in creating the
5685        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5686        calculated if N is given) For square matrices n is almost always m.
5687 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5688 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5689 .   i - row indices for "diagonal" portion of matrix
5690 .   j - column indices
5691 .   a - matrix values
5692 .   oi - row indices for "off-diagonal" portion of matrix
5693 .   oj - column indices
5694 -   oa - matrix values
5695 
5696    Output Parameter:
5697 .   mat - the matrix
5698 
5699    Level: advanced
5700 
5701    Notes:
5702        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5703        must free the arrays once the matrix has been destroyed and not before.
5704 
5705        The i and j indices are 0 based
5706 
5707        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5708 
5709        This sets local rows and cannot be used to set off-processor values.
5710 
5711        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5712        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5713        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5714        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5715        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5716        communication if it is known that only local entries will be set.
5717 
5718 .keywords: matrix, aij, compressed row, sparse, parallel
5719 
5720 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5721           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5722 @*/
5723 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5724 {
5725   PetscErrorCode ierr;
5726   Mat_MPIAIJ     *maij;
5727 
5728   PetscFunctionBegin;
5729   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5730   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5731   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5732   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5733   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5734   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5735   maij = (Mat_MPIAIJ*) (*mat)->data;
5736 
5737   (*mat)->preallocated = PETSC_TRUE;
5738 
5739   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5740   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5741 
5742   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5743   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5744 
5745   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5746   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5747   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5748   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5749 
5750   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5751   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5752   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5753   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5754   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5755   PetscFunctionReturn(0);
5756 }
5757 
5758 /*
5759     Special version for direct calls from Fortran
5760 */
5761 #include <petsc/private/fortranimpl.h>
5762 
5763 /* Change these macros so can be used in void function */
5764 #undef CHKERRQ
5765 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5766 #undef SETERRQ2
5767 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5768 #undef SETERRQ3
5769 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5770 #undef SETERRQ
5771 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5772 
5773 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5774 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5775 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5776 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5777 #else
5778 #endif
5779 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5780 {
5781   Mat            mat  = *mmat;
5782   PetscInt       m    = *mm, n = *mn;
5783   InsertMode     addv = *maddv;
5784   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5785   PetscScalar    value;
5786   PetscErrorCode ierr;
5787 
5788   MatCheckPreallocated(mat,1);
5789   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5790 
5791 #if defined(PETSC_USE_DEBUG)
5792   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5793 #endif
5794   {
5795     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5796     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5797     PetscBool roworiented = aij->roworiented;
5798 
5799     /* Some Variables required in the macro */
5800     Mat        A                 = aij->A;
5801     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5802     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5803     MatScalar  *aa               = a->a;
5804     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5805     Mat        B                 = aij->B;
5806     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5807     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5808     MatScalar  *ba               = b->a;
5809 
5810     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5811     PetscInt  nonew = a->nonew;
5812     MatScalar *ap1,*ap2;
5813 
5814     PetscFunctionBegin;
5815     for (i=0; i<m; i++) {
5816       if (im[i] < 0) continue;
5817 #if defined(PETSC_USE_DEBUG)
5818       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5819 #endif
5820       if (im[i] >= rstart && im[i] < rend) {
5821         row      = im[i] - rstart;
5822         lastcol1 = -1;
5823         rp1      = aj + ai[row];
5824         ap1      = aa + ai[row];
5825         rmax1    = aimax[row];
5826         nrow1    = ailen[row];
5827         low1     = 0;
5828         high1    = nrow1;
5829         lastcol2 = -1;
5830         rp2      = bj + bi[row];
5831         ap2      = ba + bi[row];
5832         rmax2    = bimax[row];
5833         nrow2    = bilen[row];
5834         low2     = 0;
5835         high2    = nrow2;
5836 
5837         for (j=0; j<n; j++) {
5838           if (roworiented) value = v[i*n+j];
5839           else value = v[i+j*m];
5840           if (in[j] >= cstart && in[j] < cend) {
5841             col = in[j] - cstart;
5842             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5843             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5844           } else if (in[j] < 0) continue;
5845 #if defined(PETSC_USE_DEBUG)
5846           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5847           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5848 #endif
5849           else {
5850             if (mat->was_assembled) {
5851               if (!aij->colmap) {
5852                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5853               }
5854 #if defined(PETSC_USE_CTABLE)
5855               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5856               col--;
5857 #else
5858               col = aij->colmap[in[j]] - 1;
5859 #endif
5860               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5861               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5862                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5863                 col  =  in[j];
5864                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5865                 B     = aij->B;
5866                 b     = (Mat_SeqAIJ*)B->data;
5867                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5868                 rp2   = bj + bi[row];
5869                 ap2   = ba + bi[row];
5870                 rmax2 = bimax[row];
5871                 nrow2 = bilen[row];
5872                 low2  = 0;
5873                 high2 = nrow2;
5874                 bm    = aij->B->rmap->n;
5875                 ba    = b->a;
5876               }
5877             } else col = in[j];
5878             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5879           }
5880         }
5881       } else if (!aij->donotstash) {
5882         if (roworiented) {
5883           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5884         } else {
5885           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5886         }
5887       }
5888     }
5889   }
5890   PetscFunctionReturnVoid();
5891 }
5892 
5893