xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 71fd2e92b2fee25f70818ffc1dca9fc57f8584d7)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
19 {
20   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       i,*idxb = 0;
23   PetscScalar    *va,*vb;
24   Vec            vtmp;
25 
26   PetscFunctionBegin;
27   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
28   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
29   if (idx) {
30     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++){
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
40   }
41 
42   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
43   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
44   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
45   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 EXTERN_C_BEGIN
50 #undef __FUNCT__
51 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
53 {
54   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
59   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
60   PetscFunctionReturn(0);
61 }
62 EXTERN_C_END
63 
64 EXTERN_C_BEGIN
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 EXTERN_C_END
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined (PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++){
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define CHUNKSIZE  10
110 
111 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
112 { \
113  \
114     brow = row/bs;  \
115     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
116     rmax = aimax[brow]; nrow = ailen[brow]; \
117       bcol = col/bs; \
118       ridx = row % bs; cidx = col % bs; \
119       low = 0; high = nrow; \
120       while (high-low > 3) { \
121         t = (low+high)/2; \
122         if (rp[t] > bcol) high = t; \
123         else              low  = t; \
124       } \
125       for (_i=low; _i<high; _i++) { \
126         if (rp[_i] > bcol) break; \
127         if (rp[_i] == bcol) { \
128           bap  = ap +  bs2*_i + bs*cidx + ridx; \
129           if (addv == ADD_VALUES) *bap += value;  \
130           else                    *bap  = value;  \
131           goto a_noinsert; \
132         } \
133       } \
134       if (a->nonew == 1) goto a_noinsert; \
135       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
136       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
137       N = nrow++ - 1;  \
138       /* shift up all the later entries in this row */ \
139       for (ii=N; ii>=_i; ii--) { \
140         rp[ii+1] = rp[ii]; \
141         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
142       } \
143       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
144       rp[_i]                      = bcol;  \
145       ap[bs2*_i + bs*cidx + ridx] = value;  \
146       a_noinsert:; \
147     ailen[brow] = nrow; \
148 }
149 
150 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
151 { \
152     brow = row/bs;  \
153     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
154     rmax = bimax[brow]; nrow = bilen[brow]; \
155       bcol = col/bs; \
156       ridx = row % bs; cidx = col % bs; \
157       low = 0; high = nrow; \
158       while (high-low > 3) { \
159         t = (low+high)/2; \
160         if (rp[t] > bcol) high = t; \
161         else              low  = t; \
162       } \
163       for (_i=low; _i<high; _i++) { \
164         if (rp[_i] > bcol) break; \
165         if (rp[_i] == bcol) { \
166           bap  = ap +  bs2*_i + bs*cidx + ridx; \
167           if (addv == ADD_VALUES) *bap += value;  \
168           else                    *bap  = value;  \
169           goto b_noinsert; \
170         } \
171       } \
172       if (b->nonew == 1) goto b_noinsert; \
173       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
174       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
175       CHKMEMQ;\
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for (ii=N; ii>=_i; ii--) { \
179         rp[ii+1] = rp[ii]; \
180         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
181       } \
182       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatSetValues_MPIBAIJ"
191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
192 {
193   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
194   MatScalar      value;
195   PetscTruth     roworiented = baij->roworiented;
196   PetscErrorCode ierr;
197   PetscInt       i,j,row,col;
198   PetscInt       rstart_orig=mat->rmap->rstart;
199   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
200   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
201 
202   /* Some Variables required in the macro */
203   Mat            A = baij->A;
204   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
205   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
206   MatScalar      *aa=a->a;
207 
208   Mat            B = baij->B;
209   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
210   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
211   MatScalar      *ba=b->a;
212 
213   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
214   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
215   MatScalar      *ap,*bap;
216 
217   PetscFunctionBegin;
218   if (v) PetscValidScalarPointer(v,6);
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_DEBUG)
222     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
223 #endif
224     if (im[i] >= rstart_orig && im[i] < rend_orig) {
225       row = im[i] - rstart_orig;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart_orig && in[j] < cend_orig){
228           col = in[j] - cstart_orig;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_DEBUG)
234         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!baij->colmap) {
239               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
243             col  = col - 1;
244 #else
245             col = baij->colmap[in[j]/bs] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
248               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
251               B = baij->B;
252               b = (Mat_SeqBAIJ*)(B)->data;
253               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
254               ba=b->a;
255             } else col += in[j]%bs;
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscTruth        roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + i*(stepval+bs)*bs + j*bs;
315           } else {
316             value = v + j*(stepval+bs)*bs + i*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=stepval) {
319             for (jj=0; jj<bs; jj++) {
320               *barray++  = *value++;
321             }
322           }
323           barray -=bs2;
324         }
325 
326         if (in[j] >= cstart && in[j] < cend){
327           col  = in[j] - cstart;
328           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
329         }
330         else if (in[j] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
333 #endif
334         else {
335           if (mat->was_assembled) {
336             if (!baij->colmap) {
337               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
338             }
339 
340 #if defined(PETSC_USE_DEBUG)
341 #if defined (PETSC_USE_CTABLE)
342             { PetscInt data;
343               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
344               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
345             }
346 #else
347             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
348 #endif
349 #endif
350 #if defined (PETSC_USE_CTABLE)
351 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
352             col  = (col - 1)/bs;
353 #else
354             col = (baij->colmap[in[j]] - 1)/bs;
355 #endif
356             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
357               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
358               col =  in[j];
359             }
360           }
361           else col = in[j];
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364       }
365     } else {
366       if (!baij->donotstash) {
367         if (roworiented) {
368           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         } else {
370           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         }
372       }
373     }
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #define HASH_KEY 0.6180339887
379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
385 {
386   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
387   PetscTruth     roworiented = baij->roworiented;
388   PetscErrorCode ierr;
389   PetscInt       i,j,row,col;
390   PetscInt       rstart_orig=mat->rmap->rstart;
391   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
392   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
393   PetscReal      tmp;
394   MatScalar      **HD = baij->hd,value;
395 #if defined(PETSC_USE_DEBUG)
396   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
397 #endif
398 
399   PetscFunctionBegin;
400   if (v) PetscValidScalarPointer(v,6);
401   for (i=0; i<m; i++) {
402 #if defined(PETSC_USE_DEBUG)
403     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
404     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
405 #endif
406       row = im[i];
407     if (row >= rstart_orig && row < rend_orig) {
408       for (j=0; j<n; j++) {
409         col = in[j];
410         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411         /* Look up PetscInto the Hash Table */
412         key = (row/bs)*Nbs+(col/bs)+1;
413         h1  = HASH(size,key,tmp);
414 
415 
416         idx = h1;
417 #if defined(PETSC_USE_DEBUG)
418         insert_ct++;
419         total_ct++;
420         if (HT[idx] != key) {
421           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
422           if (idx == size) {
423             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
424             if (idx == h1) {
425               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
426             }
427           }
428         }
429 #else
430         if (HT[idx] != key) {
431           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
432           if (idx == size) {
433             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
434             if (idx == h1) {
435               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436             }
437           }
438         }
439 #endif
440         /* A HASH table entry is found, so insert the values at the correct address */
441         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
442         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454 #if defined(PETSC_USE_DEBUG)
455   baij->ht_total_ct = total_ct;
456   baij->ht_insert_ct = insert_ct;
457 #endif
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
466   PetscTruth        roworiented = baij->roworiented;
467   PetscErrorCode    ierr;
468   PetscInt          i,j,ii,jj,row,col;
469   PetscInt          rstart=baij->rstartbs;
470   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
471   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
472   PetscReal         tmp;
473   MatScalar         **HD = baij->hd,*baij_a;
474   const PetscScalar *v_t,*value;
475 #if defined(PETSC_USE_DEBUG)
476   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
477 #endif
478 
479   PetscFunctionBegin;
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487 #if defined(PETSC_USE_DEBUG)
488     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     row   = im[i];
492     v_t   = v + i*nbs2;
493     if (row >= rstart && row < rend) {
494       for (j=0; j<n; j++) {
495         col = in[j];
496 
497         /* Look up into the Hash Table */
498         key = row*Nbs+col+1;
499         h1  = HASH(size,key,tmp);
500 
501         idx = h1;
502 #if defined(PETSC_USE_DEBUG)
503         total_ct++;
504         insert_ct++;
505        if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
509             if (idx == h1) {
510               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
511             }
512           }
513         }
514 #else
515         if (HT[idx] != key) {
516           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
517           if (idx == size) {
518             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
519             if (idx == h1) {
520               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
521             }
522           }
523         }
524 #endif
525         baij_a = HD[idx];
526         if (roworiented) {
527           /*value = v + i*(stepval+bs)*bs + j*bs;*/
528           /* value = v + (i*(stepval+bs)+j)*bs; */
529           value = v_t;
530           v_t  += bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval) {
539               for (jj=ii; jj<bs2; jj+=bs) {
540                 baij_a[jj]  = *value++;
541               }
542             }
543           }
544         } else {
545           value = v + j*(stepval+bs)*bs + i*bs;
546           if (addv == ADD_VALUES) {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  += *value++;
550               }
551             }
552           } else {
553             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
554               for (jj=0; jj<bs; jj++) {
555                 baij_a[jj]  = *value++;
556               }
557             }
558           }
559         }
560       }
561     } else {
562       if (!baij->donotstash) {
563         if (roworiented) {
564           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
565         } else {
566           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571 #if defined(PETSC_USE_DEBUG)
572   baij->ht_total_ct = total_ct;
573   baij->ht_insert_ct = insert_ct;
574 #endif
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetValues_MPIBAIJ"
580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
581 {
582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583   PetscErrorCode ierr;
584   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
585   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
586 
587   PetscFunctionBegin;
588   for (i=0; i<m; i++) {
589     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
590     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
591     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
592       row = idxm[i] - bsrstart;
593       for (j=0; j<n; j++) {
594         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
595         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
596         if (idxn[j] >= bscstart && idxn[j] < bscend){
597           col = idxn[j] - bscstart;
598           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599         } else {
600           if (!baij->colmap) {
601             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
602           }
603 #if defined (PETSC_USE_CTABLE)
604           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
605           data --;
606 #else
607           data = baij->colmap[idxn[j]/bs]-1;
608 #endif
609           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
610           else {
611             col  = data + idxn[j]%bs;
612             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
613           }
614         }
615       }
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
618     }
619   }
620  PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatNorm_MPIBAIJ"
625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
626 {
627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
628   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
631   PetscReal      sum = 0.0;
632   MatScalar      *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       nz = amat->nz*bs2;
641       for (i=0; i<nz; i++) {
642 #if defined(PETSC_USE_COMPLEX)
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644 #else
645         sum += (*v)*(*v); v++;
646 #endif
647       }
648       v = bmat->a;
649       nz = bmat->nz*bs2;
650       for (i=0; i<nz; i++) {
651 #if defined(PETSC_USE_COMPLEX)
652         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
653 #else
654         sum += (*v)*(*v); v++;
655 #endif
656       }
657       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       *nrm = sqrt(*nrm);
659     } else if (type == NORM_1) { /* max column sum */
660       PetscReal *tmp,*tmp2;
661       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
662       ierr = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
663       tmp2 = tmp + mat->cmap->N;
664       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
665       v = amat->a; jj = amat->j;
666       for (i=0; i<amat->nz; i++) {
667         for (j=0; j<bs; j++){
668           col = bs*(cstart + *jj) + j; /* column index */
669           for (row=0; row<bs; row++){
670             tmp[col] += PetscAbsScalar(*v);  v++;
671           }
672         }
673         jj++;
674       }
675       v = bmat->a; jj = bmat->j;
676       for (i=0; i<bmat->nz; i++) {
677         for (j=0; j<bs; j++){
678           col = bs*garray[*jj] + j;
679           for (row=0; row<bs; row++){
680             tmp[col] += PetscAbsScalar(*v); v++;
681           }
682         }
683         jj++;
684       }
685       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
686       *nrm = 0.0;
687       for (j=0; j<mat->cmap->N; j++) {
688         if (tmp2[j] > *nrm) *nrm = tmp2[j];
689       }
690       ierr = PetscFree(tmp);CHKERRQ(ierr);
691     } else if (type == NORM_INFINITY) { /* max row sum */
692       PetscReal *sums;
693       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
694       sum = 0.0;
695       for (j=0; j<amat->mbs; j++) {
696         for (row=0; row<bs; row++) sums[row] = 0.0;
697         v = amat->a + bs2*amat->i[j];
698         nz = amat->i[j+1]-amat->i[j];
699         for (i=0; i<nz; i++) {
700           for (col=0; col<bs; col++){
701             for (row=0; row<bs; row++){
702               sums[row] += PetscAbsScalar(*v); v++;
703             }
704           }
705         }
706         v = bmat->a + bs2*bmat->i[j];
707         nz = bmat->i[j+1]-bmat->i[j];
708         for (i=0; i<nz; i++) {
709           for (col=0; col<bs; col++){
710             for (row=0; row<bs; row++){
711               sums[row] += PetscAbsScalar(*v); v++;
712             }
713           }
714         }
715         for (row=0; row<bs; row++){
716           if (sums[row] > sum) sum = sums[row];
717         }
718       }
719       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
720       ierr = PetscFree(sums);CHKERRQ(ierr);
721     } else {
722       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
723     }
724   }
725   PetscFunctionReturn(0);
726 }
727 
728 /*
729   Creates the hash table, and sets the table
730   This table is created only once.
731   If new entried need to be added to the matrix
732   then the hash table has to be destroyed and
733   recreated.
734 */
735 #undef __FUNCT__
736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
738 {
739   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
740   Mat            A = baij->A,B=baij->B;
741   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
742   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
743   PetscErrorCode ierr;
744   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
745   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
746   PetscInt       *HT,key;
747   MatScalar      **HD;
748   PetscReal      tmp;
749 #if defined(PETSC_USE_INFO)
750   PetscInt       ct=0,max=0;
751 #endif
752 
753   PetscFunctionBegin;
754   baij->ht_size=(PetscInt)(factor*nz);
755   size = baij->ht_size;
756 
757   if (baij->ht) {
758     PetscFunctionReturn(0);
759   }
760 
761   /* Allocate Memory for Hash Table */
762   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
763   baij->ht = (PetscInt*)(baij->hd + size);
764   HD       = baij->hd;
765   HT       = baij->ht;
766 
767 
768   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
769 
770 
771   /* Loop Over A */
772   for (i=0; i<a->mbs; i++) {
773     for (j=ai[i]; j<ai[i+1]; j++) {
774       row = i+rstart;
775       col = aj[j]+cstart;
776 
777       key = row*Nbs + col + 1;
778       h1  = HASH(size,key,tmp);
779       for (k=0; k<size; k++){
780         if (!HT[(h1+k)%size]) {
781           HT[(h1+k)%size] = key;
782           HD[(h1+k)%size] = a->a + j*bs2;
783           break;
784 #if defined(PETSC_USE_INFO)
785         } else {
786           ct++;
787 #endif
788         }
789       }
790 #if defined(PETSC_USE_INFO)
791       if (k> max) max = k;
792 #endif
793     }
794   }
795   /* Loop Over B */
796   for (i=0; i<b->mbs; i++) {
797     for (j=bi[i]; j<bi[i+1]; j++) {
798       row = i+rstart;
799       col = garray[bj[j]];
800       key = row*Nbs + col + 1;
801       h1  = HASH(size,key,tmp);
802       for (k=0; k<size; k++){
803         if (!HT[(h1+k)%size]) {
804           HT[(h1+k)%size] = key;
805           HD[(h1+k)%size] = b->a + j*bs2;
806           break;
807 #if defined(PETSC_USE_INFO)
808         } else {
809           ct++;
810 #endif
811         }
812       }
813 #if defined(PETSC_USE_INFO)
814       if (k> max) max = k;
815 #endif
816     }
817   }
818 
819   /* Print Summary */
820 #if defined(PETSC_USE_INFO)
821   for (i=0,j=0; i<size; i++) {
822     if (HT[i]) {j++;}
823   }
824   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
825 #endif
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
831 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
832 {
833   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
834   PetscErrorCode ierr;
835   PetscInt       nstash,reallocs;
836   InsertMode     addv;
837 
838   PetscFunctionBegin;
839   if (baij->donotstash) {
840     PetscFunctionReturn(0);
841   }
842 
843   /* make sure all processors are either in INSERTMODE or ADDMODE */
844   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
845   if (addv == (ADD_VALUES|INSERT_VALUES)) {
846     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
847   }
848   mat->insertmode = addv; /* in case this processor had no cache */
849 
850   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
851   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
852   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
853   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
854   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
855   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
856   PetscFunctionReturn(0);
857 }
858 
859 #undef __FUNCT__
860 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
861 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
862 {
863   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
864   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
865   PetscErrorCode ierr;
866   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
867   PetscInt       *row,*col;
868   PetscTruth     r1,r2,r3,other_disassembled;
869   MatScalar      *val;
870   InsertMode     addv = mat->insertmode;
871   PetscMPIInt    n;
872 
873   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
874   PetscFunctionBegin;
875   if (!baij->donotstash) {
876     while (1) {
877       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
878       if (!flg) break;
879 
880       for (i=0; i<n;) {
881         /* Now identify the consecutive vals belonging to the same row */
882         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
883         if (j < n) ncols = j-i;
884         else       ncols = n-i;
885         /* Now assemble all these values with a single function call */
886         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
887         i = j;
888       }
889     }
890     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
891     /* Now process the block-stash. Since the values are stashed column-oriented,
892        set the roworiented flag to column oriented, and after MatSetValues()
893        restore the original flags */
894     r1 = baij->roworiented;
895     r2 = a->roworiented;
896     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
897     baij->roworiented = PETSC_FALSE;
898     a->roworiented    = PETSC_FALSE;
899     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
900     while (1) {
901       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
902       if (!flg) break;
903 
904       for (i=0; i<n;) {
905         /* Now identify the consecutive vals belonging to the same row */
906         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
907         if (j < n) ncols = j-i;
908         else       ncols = n-i;
909         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
910         i = j;
911       }
912     }
913     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
914     baij->roworiented = r1;
915     a->roworiented    = r2;
916     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
917   }
918 
919   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
920   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
921 
922   /* determine if any processor has disassembled, if so we must
923      also disassemble ourselfs, in order that we may reassemble. */
924   /*
925      if nonzero structure of submatrix B cannot change then we know that
926      no processor disassembled thus we can skip this stuff
927   */
928   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
929     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
930     if (mat->was_assembled && !other_disassembled) {
931       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
932     }
933   }
934 
935   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
936     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
937   }
938   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
939   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
940   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
941 
942 #if defined(PETSC_USE_INFO)
943   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
944     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
945     baij->ht_total_ct  = 0;
946     baij->ht_insert_ct = 0;
947   }
948 #endif
949   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
950     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
951     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
952     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
953   }
954 
955   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
956   baij->rowvalues = 0;
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
962 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
963 {
964   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
965   PetscErrorCode    ierr;
966   PetscMPIInt       size = baij->size,rank = baij->rank;
967   PetscInt          bs = mat->rmap->bs;
968   PetscTruth        iascii,isdraw;
969   PetscViewer       sviewer;
970   PetscViewerFormat format;
971 
972   PetscFunctionBegin;
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
974   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
975   if (iascii) {
976     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
977     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
978       MatInfo info;
979       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
980       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
981       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
982               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
983               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
984       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
985       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
986       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
987       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
988       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
989       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
990       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
991       PetscFunctionReturn(0);
992     } else if (format == PETSC_VIEWER_ASCII_INFO) {
993       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
994       PetscFunctionReturn(0);
995     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
996       PetscFunctionReturn(0);
997     }
998   }
999 
1000   if (isdraw) {
1001     PetscDraw       draw;
1002     PetscTruth isnull;
1003     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1004     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1005   }
1006 
1007   if (size == 1) {
1008     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1009     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1010   } else {
1011     /* assemble the entire matrix onto first processor. */
1012     Mat         A;
1013     Mat_SeqBAIJ *Aloc;
1014     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1015     MatScalar   *a;
1016 
1017     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1018     /* Perhaps this should be the type of mat? */
1019     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1020     if (!rank) {
1021       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1022     } else {
1023       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1024     }
1025     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1026     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1027     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1028 
1029     /* copy over the A part */
1030     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1031     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1032     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1033 
1034     for (i=0; i<mbs; i++) {
1035       rvals[0] = bs*(baij->rstartbs + i);
1036       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1037       for (j=ai[i]; j<ai[i+1]; j++) {
1038         col = (baij->cstartbs+aj[j])*bs;
1039         for (k=0; k<bs; k++) {
1040           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1041           col++; a += bs;
1042         }
1043       }
1044     }
1045     /* copy over the B part */
1046     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1047     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1048     for (i=0; i<mbs; i++) {
1049       rvals[0] = bs*(baij->rstartbs + i);
1050       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1051       for (j=ai[i]; j<ai[i+1]; j++) {
1052         col = baij->garray[aj[j]]*bs;
1053         for (k=0; k<bs; k++) {
1054           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1055           col++; a += bs;
1056         }
1057       }
1058     }
1059     ierr = PetscFree(rvals);CHKERRQ(ierr);
1060     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1061     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1062     /*
1063        Everyone has to call to draw the matrix since the graphics waits are
1064        synchronized across all processors that share the PetscDraw object
1065     */
1066     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1067     if (!rank) {
1068       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1069       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1070     }
1071     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1072     ierr = MatDestroy(A);CHKERRQ(ierr);
1073   }
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatView_MPIBAIJ"
1079 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1080 {
1081   PetscErrorCode ierr;
1082   PetscTruth     iascii,isdraw,issocket,isbinary;
1083 
1084   PetscFunctionBegin;
1085   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1086   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1087   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1088   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1089   if (iascii || isdraw || issocket || isbinary) {
1090     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1091   } else {
1092     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1093   }
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1099 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1100 {
1101   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105 #if defined(PETSC_USE_LOG)
1106   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1107 #endif
1108   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1109   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1110   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1111   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1112 #if defined (PETSC_USE_CTABLE)
1113   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1114 #else
1115   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1116 #endif
1117   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1118   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1119   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1120   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1121   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1122   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1123   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1124   ierr = PetscFree(baij);CHKERRQ(ierr);
1125 
1126   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatMult_MPIBAIJ"
1139 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1140 {
1141   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1142   PetscErrorCode ierr;
1143   PetscInt       nt;
1144 
1145   PetscFunctionBegin;
1146   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1147   if (nt != A->cmap->n) {
1148     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1149   }
1150   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1151   if (nt != A->rmap->n) {
1152     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1153   }
1154   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1155   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1156   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1157   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1163 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1164 {
1165   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1166   PetscErrorCode ierr;
1167 
1168   PetscFunctionBegin;
1169   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1170   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1171   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1172   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1178 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1179 {
1180   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1181   PetscErrorCode ierr;
1182   PetscTruth     merged;
1183 
1184   PetscFunctionBegin;
1185   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1186   /* do nondiagonal part */
1187   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1188   if (!merged) {
1189     /* send it on its way */
1190     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1191     /* do local part */
1192     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1193     /* receive remote parts: note this assumes the values are not actually */
1194     /* inserted in yy until the next line */
1195     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1196   } else {
1197     /* do local part */
1198     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1199     /* send it on its way */
1200     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1201     /* values actually were received in the Begin() but we need to call this nop */
1202     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1209 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1210 {
1211   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1212   PetscErrorCode ierr;
1213 
1214   PetscFunctionBegin;
1215   /* do nondiagonal part */
1216   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1217   /* send it on its way */
1218   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219   /* do local part */
1220   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1221   /* receive remote parts: note this assumes the values are not actually */
1222   /* inserted in yy until the next line, which is true for my implementation*/
1223   /* but is not perhaps always true. */
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1234 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1235 {
1236   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1241   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatScale_MPIBAIJ"
1247 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1248 {
1249   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1254   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1260 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1266   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1267   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1278     PetscInt     max = 1,mbs = mat->mbs,tmp;
1279     for (i=0; i<mbs; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1285   }
1286 
1287   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1288   lrow = row - brstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]/bs] < cstart)
1321               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1322             else break;
1323           }
1324           imark = i;
1325         }
1326         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1327         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1328       }
1329     } else {
1330       if (idx) *idx = 0;
1331       if (v)   *v   = 0;
1332     }
1333   }
1334   *nz = nztot;
1335   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1336   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1342 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1343 {
1344   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1345 
1346   PetscFunctionBegin;
1347   if (!baij->getrowactive) {
1348     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1349   }
1350   baij->getrowactive = PETSC_FALSE;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1356 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1357 {
1358   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1363   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1369 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1370 {
1371   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1372   Mat            A = a->A,B = a->B;
1373   PetscErrorCode ierr;
1374   PetscReal      isend[5],irecv[5];
1375 
1376   PetscFunctionBegin;
1377   info->block_size     = (PetscReal)matin->rmap->bs;
1378   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1379   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1380   isend[3] = info->memory;  isend[4] = info->mallocs;
1381   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1382   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1383   isend[3] += info->memory;  isend[4] += info->mallocs;
1384   if (flag == MAT_LOCAL) {
1385     info->nz_used      = isend[0];
1386     info->nz_allocated = isend[1];
1387     info->nz_unneeded  = isend[2];
1388     info->memory       = isend[3];
1389     info->mallocs      = isend[4];
1390   } else if (flag == MAT_GLOBAL_MAX) {
1391     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1392     info->nz_used      = irecv[0];
1393     info->nz_allocated = irecv[1];
1394     info->nz_unneeded  = irecv[2];
1395     info->memory       = irecv[3];
1396     info->mallocs      = irecv[4];
1397   } else if (flag == MAT_GLOBAL_SUM) {
1398     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1399     info->nz_used      = irecv[0];
1400     info->nz_allocated = irecv[1];
1401     info->nz_unneeded  = irecv[2];
1402     info->memory       = irecv[3];
1403     info->mallocs      = irecv[4];
1404   } else {
1405     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1406   }
1407   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1408   info->fill_ratio_needed = 0;
1409   info->factor_mallocs    = 0;
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1415 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1416 {
1417   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   switch (op) {
1422   case MAT_NEW_NONZERO_LOCATIONS:
1423   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1424   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1425   case MAT_KEEP_NONZERO_PATTERN:
1426   case MAT_NEW_NONZERO_LOCATION_ERR:
1427     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1428     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1429     break;
1430   case MAT_ROW_ORIENTED:
1431     a->roworiented = flg;
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_NEW_DIAGONALS:
1436     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1437     break;
1438   case MAT_IGNORE_OFF_PROC_ENTRIES:
1439     a->donotstash = flg;
1440     break;
1441   case MAT_USE_HASH_TABLE:
1442     a->ht_flag = flg;
1443     break;
1444   case MAT_SYMMETRIC:
1445   case MAT_STRUCTURALLY_SYMMETRIC:
1446   case MAT_HERMITIAN:
1447   case MAT_SYMMETRY_ETERNAL:
1448     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1449     break;
1450   default:
1451     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1452   }
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1458 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1459 {
1460   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1461   Mat_SeqBAIJ    *Aloc;
1462   Mat            B;
1463   PetscErrorCode ierr;
1464   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1465   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1466   MatScalar      *a;
1467 
1468   PetscFunctionBegin;
1469   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1470   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1471     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1472     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1473     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1474     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1475   } else {
1476     B = *matout;
1477   }
1478 
1479   /* copy over the A part */
1480   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1481   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1482   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1483 
1484   for (i=0; i<mbs; i++) {
1485     rvals[0] = bs*(baij->rstartbs + i);
1486     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1487     for (j=ai[i]; j<ai[i+1]; j++) {
1488       col = (baij->cstartbs+aj[j])*bs;
1489       for (k=0; k<bs; k++) {
1490         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1491         col++; a += bs;
1492       }
1493     }
1494   }
1495   /* copy over the B part */
1496   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1497   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1498   for (i=0; i<mbs; i++) {
1499     rvals[0] = bs*(baij->rstartbs + i);
1500     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1501     for (j=ai[i]; j<ai[i+1]; j++) {
1502       col = baij->garray[aj[j]]*bs;
1503       for (k=0; k<bs; k++) {
1504         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1505         col++; a += bs;
1506       }
1507     }
1508   }
1509   ierr = PetscFree(rvals);CHKERRQ(ierr);
1510   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1511   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1512 
1513   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1514     *matout = B;
1515   } else {
1516     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1517   }
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1523 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1524 {
1525   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1526   Mat            a = baij->A,b = baij->B;
1527   PetscErrorCode ierr;
1528   PetscInt       s1,s2,s3;
1529 
1530   PetscFunctionBegin;
1531   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1532   if (rr) {
1533     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1534     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1535     /* Overlap communication with computation. */
1536     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1537   }
1538   if (ll) {
1539     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1540     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1541     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1542   }
1543   /* scale  the diagonal block */
1544   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1545 
1546   if (rr) {
1547     /* Do a scatter end and then right scale the off-diagonal block */
1548     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1549     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1550   }
1551 
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1557 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1558 {
1559   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1560   PetscErrorCode ierr;
1561   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1562   PetscInt       i,*owners = A->rmap->range;
1563   PetscInt       *nprocs,j,idx,nsends,row;
1564   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1565   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1566   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1567   MPI_Comm       comm = ((PetscObject)A)->comm;
1568   MPI_Request    *send_waits,*recv_waits;
1569   MPI_Status     recv_status,*send_status;
1570 #if defined(PETSC_DEBUG)
1571   PetscTruth     found = PETSC_FALSE;
1572 #endif
1573 
1574   PetscFunctionBegin;
1575   /*  first count number of contributors to each processor */
1576   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1577   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1578   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1579   j = 0;
1580   for (i=0; i<N; i++) {
1581     if (lastidx > (idx = rows[i])) j = 0;
1582     lastidx = idx;
1583     for (; j<size; j++) {
1584       if (idx >= owners[j] && idx < owners[j+1]) {
1585         nprocs[2*j]++;
1586         nprocs[2*j+1] = 1;
1587         owner[i] = j;
1588 #if defined(PETSC_DEBUG)
1589         found = PETSC_TRUE;
1590 #endif
1591         break;
1592       }
1593     }
1594 #if defined(PETSC_DEBUG)
1595     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1596     found = PETSC_FALSE;
1597 #endif
1598   }
1599   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1600 
1601   /* inform other processors of number of messages and max length*/
1602   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1603 
1604   /* post receives:   */
1605   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1606   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1607   for (i=0; i<nrecvs; i++) {
1608     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1609   }
1610 
1611   /* do sends:
1612      1) starts[i] gives the starting index in svalues for stuff going to
1613      the ith processor
1614   */
1615   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1616   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1617   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1618   starts[0]  = 0;
1619   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1620   for (i=0; i<N; i++) {
1621     svalues[starts[owner[i]]++] = rows[i];
1622   }
1623 
1624   starts[0] = 0;
1625   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1626   count = 0;
1627   for (i=0; i<size; i++) {
1628     if (nprocs[2*i+1]) {
1629       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1630     }
1631   }
1632   ierr = PetscFree(starts);CHKERRQ(ierr);
1633 
1634   base = owners[rank];
1635 
1636   /*  wait on receives */
1637   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1638   source = lens + nrecvs;
1639   count  = nrecvs; slen = 0;
1640   while (count) {
1641     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1642     /* unpack receives into our local space */
1643     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1644     source[imdex]  = recv_status.MPI_SOURCE;
1645     lens[imdex]    = n;
1646     slen          += n;
1647     count--;
1648   }
1649   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1650 
1651   /* move the data into the send scatter */
1652   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1653   count = 0;
1654   for (i=0; i<nrecvs; i++) {
1655     values = rvalues + i*nmax;
1656     for (j=0; j<lens[i]; j++) {
1657       lrows[count++] = values[j] - base;
1658     }
1659   }
1660   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1661   ierr = PetscFree(lens);CHKERRQ(ierr);
1662   ierr = PetscFree(owner);CHKERRQ(ierr);
1663   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1664 
1665   /* actually zap the local rows */
1666   /*
1667         Zero the required rows. If the "diagonal block" of the matrix
1668      is square and the user wishes to set the diagonal we use separate
1669      code so that MatSetValues() is not called for each diagonal allocating
1670      new memory, thus calling lots of mallocs and slowing things down.
1671 
1672   */
1673   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1674   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1675   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1676     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1677   } else if (diag != 0.0) {
1678     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1679     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1680       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1681 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1682     }
1683     for (i=0; i<slen; i++) {
1684       row  = lrows[i] + rstart_bs;
1685       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1686     }
1687     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1689   } else {
1690     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1691   }
1692 
1693   ierr = PetscFree(lrows);CHKERRQ(ierr);
1694 
1695   /* wait on sends */
1696   if (nsends) {
1697     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1698     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1699     ierr = PetscFree(send_status);CHKERRQ(ierr);
1700   }
1701   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1702   ierr = PetscFree(svalues);CHKERRQ(ierr);
1703 
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1709 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1710 {
1711   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1720 
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatEqual_MPIBAIJ"
1723 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1724 {
1725   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1726   Mat            a,b,c,d;
1727   PetscTruth     flg;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   a = matA->A; b = matA->B;
1732   c = matB->A; d = matB->B;
1733 
1734   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1735   if (flg) {
1736     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1737   }
1738   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatCopy_MPIBAIJ"
1744 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1745 {
1746   PetscErrorCode ierr;
1747   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1748   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1749 
1750   PetscFunctionBegin;
1751   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1752   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1753     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1754   } else {
1755     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1756     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1757   }
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1763 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1764 {
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #include "petscblaslapack.h"
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1775 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1776 {
1777   PetscErrorCode ierr;
1778   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1779   PetscBLASInt   bnz,one=1;
1780   Mat_SeqBAIJ    *x,*y;
1781 
1782   PetscFunctionBegin;
1783   if (str == SAME_NONZERO_PATTERN) {
1784     PetscScalar alpha = a;
1785     x = (Mat_SeqBAIJ *)xx->A->data;
1786     y = (Mat_SeqBAIJ *)yy->A->data;
1787     bnz = PetscBLASIntCast(x->nz);
1788     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1789     x = (Mat_SeqBAIJ *)xx->B->data;
1790     y = (Mat_SeqBAIJ *)yy->B->data;
1791     bnz = PetscBLASIntCast(x->nz);
1792     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1793   } else {
1794     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1795   }
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1801 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1802 {
1803   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1804   PetscErrorCode ierr;
1805 
1806   PetscFunctionBegin;
1807   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1808   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1814 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1815 {
1816   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1821   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1827 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1828 {
1829   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1830   PetscErrorCode ierr;
1831 
1832   PetscFunctionBegin;
1833   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1834   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1840 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1841 {
1842   PetscErrorCode ierr;
1843   IS             iscol_local;
1844   PetscInt       csize;
1845 
1846   PetscFunctionBegin;
1847   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1848   if (call == MAT_REUSE_MATRIX) {
1849     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1850     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1851   } else {
1852     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1853   }
1854   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1855   if (call == MAT_INITIAL_MATRIX) {
1856     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1857     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1858   }
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1864 /*
1865     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1866   in local and then by concatenating the local matrices the end result.
1867   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1868 */
1869 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1870 {
1871   PetscErrorCode ierr;
1872   PetscMPIInt    rank,size;
1873   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1874   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1875   Mat            *local,M,Mreuse;
1876   MatScalar      *vwork,*aa;
1877   MPI_Comm       comm = ((PetscObject)mat)->comm;
1878   Mat_SeqBAIJ    *aij;
1879 
1880 
1881   PetscFunctionBegin;
1882   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1883   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1884 
1885   if (call ==  MAT_REUSE_MATRIX) {
1886     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1887     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1888     local = &Mreuse;
1889     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1890   } else {
1891     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1892     Mreuse = *local;
1893     ierr   = PetscFree(local);CHKERRQ(ierr);
1894   }
1895 
1896   /*
1897       m - number of local rows
1898       n - number of columns (same on all processors)
1899       rstart - first row in new global matrix generated
1900   */
1901   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1902   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1903   m    = m/bs;
1904   n    = n/bs;
1905 
1906   if (call == MAT_INITIAL_MATRIX) {
1907     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1908     ii  = aij->i;
1909     jj  = aij->j;
1910 
1911     /*
1912         Determine the number of non-zeros in the diagonal and off-diagonal
1913         portions of the matrix in order to do correct preallocation
1914     */
1915 
1916     /* first get start and end of "diagonal" columns */
1917     if (csize == PETSC_DECIDE) {
1918       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1919       if (mglobal == n*bs) { /* square matrix */
1920 	nlocal = m;
1921       } else {
1922         nlocal = n/size + ((n % size) > rank);
1923       }
1924     } else {
1925       nlocal = csize/bs;
1926     }
1927     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1928     rstart = rend - nlocal;
1929     if (rank == size - 1 && rend != n) {
1930       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1931     }
1932 
1933     /* next, compute all the lengths */
1934     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1935     olens = dlens + m;
1936     for (i=0; i<m; i++) {
1937       jend = ii[i+1] - ii[i];
1938       olen = 0;
1939       dlen = 0;
1940       for (j=0; j<jend; j++) {
1941         if (*jj < rstart || *jj >= rend) olen++;
1942         else dlen++;
1943         jj++;
1944       }
1945       olens[i] = olen;
1946       dlens[i] = dlen;
1947     }
1948     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1949     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1950     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1951     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1952     ierr = PetscFree(dlens);CHKERRQ(ierr);
1953   } else {
1954     PetscInt ml,nl;
1955 
1956     M = *newmat;
1957     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1958     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1959     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1960     /*
1961          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1962        rather than the slower MatSetValues().
1963     */
1964     M->was_assembled = PETSC_TRUE;
1965     M->assembled     = PETSC_FALSE;
1966   }
1967   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1968   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1969   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1970   ii  = aij->i;
1971   jj  = aij->j;
1972   aa  = aij->a;
1973   for (i=0; i<m; i++) {
1974     row   = rstart/bs + i;
1975     nz    = ii[i+1] - ii[i];
1976     cwork = jj;     jj += nz;
1977     vwork = aa;     aa += nz;
1978     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1979   }
1980 
1981   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1982   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1983   *newmat = M;
1984 
1985   /* save submatrix used in processor for next request */
1986   if (call ==  MAT_INITIAL_MATRIX) {
1987     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1988     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1989   }
1990 
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "MatPermute_MPIBAIJ"
1996 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1997 {
1998   MPI_Comm       comm,pcomm;
1999   PetscInt       first,local_size,nrows;
2000   const PetscInt *rows;
2001   PetscMPIInt    size;
2002   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2003   PetscErrorCode ierr;
2004 
2005   PetscFunctionBegin;
2006   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2007   /* make a collective version of 'rowp' */
2008   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2009   if (pcomm==comm) {
2010     crowp = rowp;
2011   } else {
2012     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2013     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2014     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2015     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2016   }
2017   /* collect the global row permutation and invert it */
2018   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2019   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2020   if (pcomm!=comm) {
2021     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2022   }
2023   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2024   /* get the local target indices */
2025   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2026   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2027   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2028   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2029   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2030   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2031   /* the column permutation is so much easier;
2032      make a local version of 'colp' and invert it */
2033   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2034   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2035   if (size==1) {
2036     lcolp = colp;
2037   } else {
2038     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2039     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2040     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2041   }
2042   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2043   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2044   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2045   if (size>1) {
2046     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2047     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2048   }
2049   /* now we just get the submatrix */
2050   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2051   /* clean up */
2052   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2053   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2059 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2060 {
2061   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2062   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2063 
2064   PetscFunctionBegin;
2065   if (nghosts) { *nghosts = B->nbs;}
2066   if (ghosts) {*ghosts = baij->garray;}
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2074 /*
2075     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2076 */
2077 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2078 {
2079   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2080   PetscErrorCode        ierr;
2081   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2082   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2083   const PetscInt        *is;
2084   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2085   PetscInt              *rowhit,M,cstart,cend,colb;
2086   PetscInt              *columnsforrow,l;
2087   IS                    *isa;
2088   PetscTruth             done,flg;
2089   ISLocalToGlobalMapping map = mat->bmapping;
2090   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2091 
2092   PetscFunctionBegin;
2093   if (!mat->assembled) {
2094     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2095   }
2096   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2097 
2098   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2099 
2100   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2101   M                = mat->rmap->n/bs;
2102   cstart           = mat->cmap->rstart/bs;
2103   cend             = mat->cmap->rend/bs;
2104   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2105   c->N             = mat->cmap->N/bs;
2106   c->m             = mat->rmap->n/bs;
2107   c->rstart        = mat->rmap->rstart/bs;
2108 
2109   c->ncolors       = nis;
2110   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2111   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2112   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2113   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2114   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2115   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2116 
2117   /* Allow access to data structures of local part of matrix */
2118   if (!baij->colmap) {
2119     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2120   }
2121   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2122   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2123 
2124   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2125   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2126 
2127   for (i=0; i<nis; i++) {
2128     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2129     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2130     c->ncolumns[i] = n;
2131     if (n) {
2132       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2133       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2134       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2135     } else {
2136       c->columns[i]  = 0;
2137     }
2138 
2139     if (ctype == IS_COLORING_GLOBAL){
2140       /* Determine the total (parallel) number of columns of this color */
2141       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2142       ierr = PetscMalloc(2*size*sizeof(PetscInt*),&ncolsonproc);CHKERRQ(ierr);
2143       disp = ncolsonproc + size;
2144 
2145       nn   = PetscMPIIntCast(n);
2146       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2147       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2148       if (!nctot) {
2149         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2150       }
2151 
2152       disp[0] = 0;
2153       for (j=1; j<size; j++) {
2154         disp[j] = disp[j-1] + ncolsonproc[j-1];
2155       }
2156 
2157       /* Get complete list of columns for color on each processor */
2158       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2159       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2160       ierr = PetscFree(ncolsonproc);CHKERRQ(ierr);
2161     } else if (ctype == IS_COLORING_GHOSTED){
2162       /* Determine local number of columns of this color on this process, including ghost points */
2163       nctot = n;
2164       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2165       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2166     } else {
2167       SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2168     }
2169 
2170     /*
2171        Mark all rows affect by these columns
2172     */
2173     /* Temporary option to allow for debugging/testing */
2174     flg  = PETSC_FALSE;
2175     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2176     if (!flg) {/*-----------------------------------------------------------------------------*/
2177       /* crude, fast version */
2178       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2179       /* loop over columns*/
2180       for (j=0; j<nctot; j++) {
2181         if (ctype == IS_COLORING_GHOSTED) {
2182           col = ltog[cols[j]];
2183         } else {
2184           col  = cols[j];
2185         }
2186         if (col >= cstart && col < cend) {
2187           /* column is in diagonal block of matrix */
2188           rows = A_cj + A_ci[col-cstart];
2189           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2190         } else {
2191 #if defined (PETSC_USE_CTABLE)
2192           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr)
2193 	  colb --;
2194 #else
2195           colb = baij->colmap[col] - 1;
2196 #endif
2197           if (colb == -1) {
2198             m = 0;
2199           } else {
2200             colb = colb/bs;
2201             rows = B_cj + B_ci[colb];
2202             m    = B_ci[colb+1] - B_ci[colb];
2203           }
2204         }
2205         /* loop over columns marking them in rowhit */
2206         for (k=0; k<m; k++) {
2207           rowhit[*rows++] = col + 1;
2208         }
2209       }
2210 
2211       /* count the number of hits */
2212       nrows = 0;
2213       for (j=0; j<M; j++) {
2214         if (rowhit[j]) nrows++;
2215       }
2216       c->nrows[i]         = nrows;
2217       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2218       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2219       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2220       nrows = 0;
2221       for (j=0; j<M; j++) {
2222         if (rowhit[j]) {
2223           c->rows[i][nrows]           = j;
2224           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2225           nrows++;
2226         }
2227       }
2228     } else {/*-------------------------------------------------------------------------------*/
2229       /* slow version, using rowhit as a linked list */
2230       PetscInt currentcol,fm,mfm;
2231       rowhit[M] = M;
2232       nrows     = 0;
2233       /* loop over columns*/
2234       for (j=0; j<nctot; j++) {
2235         if (ctype == IS_COLORING_GHOSTED) {
2236           col = ltog[cols[j]];
2237         } else {
2238           col  = cols[j];
2239         }
2240         if (col >= cstart && col < cend) {
2241           /* column is in diagonal block of matrix */
2242           rows = A_cj + A_ci[col-cstart];
2243           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2244         } else {
2245 #if defined (PETSC_USE_CTABLE)
2246           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2247           colb --;
2248 #else
2249           colb = baij->colmap[col] - 1;
2250 #endif
2251           if (colb == -1) {
2252             m = 0;
2253           } else {
2254             colb = colb/bs;
2255             rows = B_cj + B_ci[colb];
2256             m    = B_ci[colb+1] - B_ci[colb];
2257           }
2258         }
2259 
2260         /* loop over columns marking them in rowhit */
2261         fm    = M; /* fm points to first entry in linked list */
2262         for (k=0; k<m; k++) {
2263           currentcol = *rows++;
2264 	  /* is it already in the list? */
2265           do {
2266             mfm  = fm;
2267             fm   = rowhit[fm];
2268           } while (fm < currentcol);
2269           /* not in list so add it */
2270           if (fm != currentcol) {
2271             nrows++;
2272             columnsforrow[currentcol] = col;
2273             /* next three lines insert new entry into linked list */
2274             rowhit[mfm]               = currentcol;
2275             rowhit[currentcol]        = fm;
2276             fm                        = currentcol;
2277             /* fm points to present position in list since we know the columns are sorted */
2278           } else {
2279             SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2280           }
2281         }
2282       }
2283       c->nrows[i]         = nrows;
2284       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2285       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2286       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2287       /* now store the linked list of rows into c->rows[i] */
2288       nrows = 0;
2289       fm    = rowhit[M];
2290       do {
2291         c->rows[i][nrows]            = fm;
2292         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2293         fm                           = rowhit[fm];
2294       } while (fm < M);
2295     } /* ---------------------------------------------------------------------------------------*/
2296     ierr = PetscFree(cols);CHKERRQ(ierr);
2297   }
2298 
2299   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2300   /*
2301        vscale will contain the "diagonal" on processor scalings followed by the off processor
2302   */
2303   if (ctype == IS_COLORING_GLOBAL) {
2304     PetscInt *garray;
2305     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2306     for (i=0; i<baij->B->cmap->n/bs; i++) {
2307       for (j=0; j<bs; j++) {
2308         garray[i*bs+j] = bs*baij->garray[i]+j;
2309       }
2310     }
2311     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2312     ierr = PetscFree(garray);CHKERRQ(ierr);
2313     CHKMEMQ;
2314     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2315     for (k=0; k<c->ncolors; k++) {
2316       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2317       for (l=0; l<c->nrows[k]; l++) {
2318         col = c->columnsforrow[k][l];
2319         if (col >= cstart && col < cend) {
2320           /* column is in diagonal block of matrix */
2321           colb = col - cstart;
2322         } else {
2323           /* column  is in "off-processor" part */
2324 #if defined (PETSC_USE_CTABLE)
2325           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2326           colb --;
2327 #else
2328           colb = baij->colmap[col] - 1;
2329 #endif
2330           colb = colb/bs;
2331           colb += cend - cstart;
2332         }
2333         c->vscaleforrow[k][l] = colb;
2334       }
2335     }
2336   } else if (ctype == IS_COLORING_GHOSTED) {
2337     /* Get gtol mapping */
2338     PetscInt N = mat->cmap->N, *gtol;
2339     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2340     for (i=0; i<N; i++) gtol[i] = -1;
2341     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2342 
2343     c->vscale = 0; /* will be created in MatFDColoringApply() */
2344     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2345     for (k=0; k<c->ncolors; k++) {
2346       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2347       for (l=0; l<c->nrows[k]; l++) {
2348         col = c->columnsforrow[k][l];      /* global column index */
2349         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2350       }
2351     }
2352     ierr = PetscFree(gtol);CHKERRQ(ierr);
2353   }
2354   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2355 
2356   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2357   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2358   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2359   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2360     CHKMEMQ;
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2366 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2367 {
2368   Mat            B;
2369   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2370   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2371   Mat_SeqAIJ     *b;
2372   PetscErrorCode ierr;
2373   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2374   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2375   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2376 
2377   PetscFunctionBegin;
2378   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2379   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2380 
2381   /* ----------------------------------------------------------------
2382      Tell every processor the number of nonzeros per row
2383   */
2384   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2385   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2386     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2387   }
2388   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2389   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2390   displs     = recvcounts + size;
2391   for (i=0; i<size; i++) {
2392     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2393     displs[i]     = A->rmap->range[i]/bs;
2394   }
2395 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2396   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2397 #else
2398   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2399 #endif
2400   /* ---------------------------------------------------------------
2401      Create the sequential matrix of the same type as the local block diagonal
2402   */
2403   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2404   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2405   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2406   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2407   b = (Mat_SeqAIJ *)B->data;
2408 
2409   /*--------------------------------------------------------------------
2410     Copy my part of matrix column indices over
2411   */
2412   sendcount  = ad->nz + bd->nz;
2413   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2414   a_jsendbuf = ad->j;
2415   b_jsendbuf = bd->j;
2416   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2417   cnt        = 0;
2418   for (i=0; i<n; i++) {
2419 
2420     /* put in lower diagonal portion */
2421     m = bd->i[i+1] - bd->i[i];
2422     while (m > 0) {
2423       /* is it above diagonal (in bd (compressed) numbering) */
2424       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2425       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2426       m--;
2427     }
2428 
2429     /* put in diagonal portion */
2430     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2431       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2432     }
2433 
2434     /* put in upper diagonal portion */
2435     while (m-- > 0) {
2436       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2437     }
2438   }
2439   if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2440 
2441   /*--------------------------------------------------------------------
2442     Gather all column indices to all processors
2443   */
2444   for (i=0; i<size; i++) {
2445     recvcounts[i] = 0;
2446     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2447       recvcounts[i] += lens[j];
2448     }
2449   }
2450   displs[0]  = 0;
2451   for (i=1; i<size; i++) {
2452     displs[i] = displs[i-1] + recvcounts[i-1];
2453   }
2454 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2455   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2456 #else
2457   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2458 #endif
2459   /*--------------------------------------------------------------------
2460     Assemble the matrix into useable form (note numerical values not yet set)
2461   */
2462   /* set the b->ilen (length of each row) values */
2463   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2464   /* set the b->i indices */
2465   b->i[0] = 0;
2466   for (i=1; i<=A->rmap->N/bs; i++) {
2467     b->i[i] = b->i[i-1] + lens[i-1];
2468   }
2469   ierr = PetscFree(lens);CHKERRQ(ierr);
2470   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2471   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2472   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2473 
2474   if (A->symmetric){
2475     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2476   } else if (A->hermitian) {
2477     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2478   } else if (A->structurally_symmetric) {
2479     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2480   }
2481   *newmat = B;
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatSOR_MPIBAIJ"
2487 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2488 {
2489   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2490   PetscErrorCode ierr;
2491   Vec            bb1 = 0;
2492 
2493   PetscFunctionBegin;
2494   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2495     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2496   }
2497 
2498   if (flag == SOR_APPLY_UPPER) {
2499     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2500     PetscFunctionReturn(0);
2501   }
2502 
2503   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2504     if (flag & SOR_ZERO_INITIAL_GUESS) {
2505       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2506       its--;
2507     }
2508 
2509     while (its--) {
2510       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2511       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2512 
2513       /* update rhs: bb1 = bb - B*x */
2514       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2515       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2516 
2517       /* local sweep */
2518       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2519     }
2520   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2521     if (flag & SOR_ZERO_INITIAL_GUESS) {
2522       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2523       its--;
2524     }
2525     while (its--) {
2526       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2527       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2528 
2529       /* update rhs: bb1 = bb - B*x */
2530       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2531       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2532 
2533       /* local sweep */
2534       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2535     }
2536   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2537     if (flag & SOR_ZERO_INITIAL_GUESS) {
2538       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2539       its--;
2540     }
2541     while (its--) {
2542       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2543       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2544 
2545       /* update rhs: bb1 = bb - B*x */
2546       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2547       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2548 
2549       /* local sweep */
2550       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2551     }
2552   } else {
2553     SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2554   }
2555 
2556   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2561 
2562 
2563 /* -------------------------------------------------------------------*/
2564 static struct _MatOps MatOps_Values = {
2565        MatSetValues_MPIBAIJ,
2566        MatGetRow_MPIBAIJ,
2567        MatRestoreRow_MPIBAIJ,
2568        MatMult_MPIBAIJ,
2569 /* 4*/ MatMultAdd_MPIBAIJ,
2570        MatMultTranspose_MPIBAIJ,
2571        MatMultTransposeAdd_MPIBAIJ,
2572        0,
2573        0,
2574        0,
2575 /*10*/ 0,
2576        0,
2577        0,
2578        MatSOR_MPIBAIJ,
2579        MatTranspose_MPIBAIJ,
2580 /*15*/ MatGetInfo_MPIBAIJ,
2581        MatEqual_MPIBAIJ,
2582        MatGetDiagonal_MPIBAIJ,
2583        MatDiagonalScale_MPIBAIJ,
2584        MatNorm_MPIBAIJ,
2585 /*20*/ MatAssemblyBegin_MPIBAIJ,
2586        MatAssemblyEnd_MPIBAIJ,
2587        MatSetOption_MPIBAIJ,
2588        MatZeroEntries_MPIBAIJ,
2589 /*24*/ MatZeroRows_MPIBAIJ,
2590        0,
2591        0,
2592        0,
2593        0,
2594 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2595        0,
2596        0,
2597        0,
2598        0,
2599 /*34*/ MatDuplicate_MPIBAIJ,
2600        0,
2601        0,
2602        0,
2603        0,
2604 /*39*/ MatAXPY_MPIBAIJ,
2605        MatGetSubMatrices_MPIBAIJ,
2606        MatIncreaseOverlap_MPIBAIJ,
2607        MatGetValues_MPIBAIJ,
2608        MatCopy_MPIBAIJ,
2609 /*44*/ 0,
2610        MatScale_MPIBAIJ,
2611        0,
2612        0,
2613        0,
2614 /*49*/ MatSetBlockSize_MPIBAIJ,
2615        0,
2616        0,
2617        0,
2618        0,
2619 /*54*/ MatFDColoringCreate_MPIBAIJ,
2620        0,
2621        MatSetUnfactored_MPIBAIJ,
2622        MatPermute_MPIBAIJ,
2623        MatSetValuesBlocked_MPIBAIJ,
2624 /*59*/ MatGetSubMatrix_MPIBAIJ,
2625        MatDestroy_MPIBAIJ,
2626        MatView_MPIBAIJ,
2627        0,
2628        0,
2629 /*64*/ 0,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2635        0,
2636        0,
2637        0,
2638        0,
2639 /*74*/ 0,
2640        MatFDColoringApply_BAIJ,
2641        0,
2642        0,
2643        0,
2644 /*79*/ 0,
2645        0,
2646        0,
2647        0,
2648        MatLoad_MPIBAIJ,
2649 /*84*/ 0,
2650        0,
2651        0,
2652        0,
2653        0,
2654 /*89*/ 0,
2655        0,
2656        0,
2657        0,
2658        0,
2659 /*94*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*99*/ 0,
2665        0,
2666        0,
2667        0,
2668        0,
2669 /*104*/0,
2670        MatRealPart_MPIBAIJ,
2671        MatImaginaryPart_MPIBAIJ,
2672        0,
2673        0,
2674 /*109*/0,
2675        0,
2676        0,
2677        0,
2678        0,
2679 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2680        0,
2681        MatGetGhosts_MPIBAIJ
2682 };
2683 
2684 EXTERN_C_BEGIN
2685 #undef __FUNCT__
2686 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2687 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2688 {
2689   PetscFunctionBegin;
2690   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2691   *iscopy = PETSC_FALSE;
2692   PetscFunctionReturn(0);
2693 }
2694 EXTERN_C_END
2695 
2696 EXTERN_C_BEGIN
2697 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2698 EXTERN_C_END
2699 
2700 EXTERN_C_BEGIN
2701 #undef __FUNCT__
2702 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2703 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2704 {
2705   PetscInt       m,rstart,cstart,cend;
2706   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2707   const PetscInt *JJ=0;
2708   PetscScalar    *values=0;
2709   PetscErrorCode ierr;
2710 
2711   PetscFunctionBegin;
2712 
2713   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2714   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2715   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2716   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2717   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2718   m      = B->rmap->n/bs;
2719   rstart = B->rmap->rstart/bs;
2720   cstart = B->cmap->rstart/bs;
2721   cend   = B->cmap->rend/bs;
2722 
2723   if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2724   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2725   o_nnz = d_nnz + m;
2726   for (i=0; i<m; i++) {
2727     nz = ii[i+1] - ii[i];
2728     if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2729     nz_max = PetscMax(nz_max,nz);
2730     JJ  = jj + ii[i];
2731     for (j=0; j<nz; j++) {
2732       if (*JJ >= cstart) break;
2733       JJ++;
2734     }
2735     d = 0;
2736     for (; j<nz; j++) {
2737       if (*JJ++ >= cend) break;
2738       d++;
2739     }
2740     d_nnz[i] = d;
2741     o_nnz[i] = nz - d;
2742   }
2743   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2744   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2745 
2746   values = (PetscScalar*)V;
2747   if (!values) {
2748     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2749     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2750   }
2751   for (i=0; i<m; i++) {
2752     PetscInt          row    = i + rstart;
2753     PetscInt          ncols  = ii[i+1] - ii[i];
2754     const PetscInt    *icols = jj + ii[i];
2755     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2756     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2757   }
2758 
2759   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2760   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2761   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2762 
2763   PetscFunctionReturn(0);
2764 }
2765 EXTERN_C_END
2766 
2767 #undef __FUNCT__
2768 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2769 /*@C
2770    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2771    (the default parallel PETSc format).
2772 
2773    Collective on MPI_Comm
2774 
2775    Input Parameters:
2776 +  A - the matrix
2777 .  i - the indices into j for the start of each local row (starts with zero)
2778 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2779 -  v - optional values in the matrix
2780 
2781    Level: developer
2782 
2783 .keywords: matrix, aij, compressed row, sparse, parallel
2784 
2785 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2786 @*/
2787 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2788 {
2789   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2790 
2791   PetscFunctionBegin;
2792   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2793   if (f) {
2794     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2795   }
2796   PetscFunctionReturn(0);
2797 }
2798 
2799 EXTERN_C_BEGIN
2800 #undef __FUNCT__
2801 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2802 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2803 {
2804   Mat_MPIBAIJ    *b;
2805   PetscErrorCode ierr;
2806   PetscInt       i, newbs = PetscAbs(bs);
2807 
2808   PetscFunctionBegin;
2809   if (bs < 0) {
2810     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2811       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2812     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2813     bs   = PetscAbs(bs);
2814   }
2815   if ((d_nnz || o_nnz) && newbs != bs) {
2816     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2817   }
2818   bs = newbs;
2819 
2820 
2821   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2822   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2823   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2824   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2825   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2826 
2827   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2828   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2829   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2830   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2831 
2832   if (d_nnz) {
2833     for (i=0; i<B->rmap->n/bs; i++) {
2834       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2835     }
2836   }
2837   if (o_nnz) {
2838     for (i=0; i<B->rmap->n/bs; i++) {
2839       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2840     }
2841   }
2842 
2843   b = (Mat_MPIBAIJ*)B->data;
2844   b->bs2 = bs*bs;
2845   b->mbs = B->rmap->n/bs;
2846   b->nbs = B->cmap->n/bs;
2847   b->Mbs = B->rmap->N/bs;
2848   b->Nbs = B->cmap->N/bs;
2849 
2850   for (i=0; i<=b->size; i++) {
2851     b->rangebs[i] = B->rmap->range[i]/bs;
2852   }
2853   b->rstartbs = B->rmap->rstart/bs;
2854   b->rendbs   = B->rmap->rend/bs;
2855   b->cstartbs = B->cmap->rstart/bs;
2856   b->cendbs   = B->cmap->rend/bs;
2857 
2858   if (!B->preallocated) {
2859     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2860     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2861     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2862     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2863     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2864     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2865     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2866     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2867     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2868   }
2869 
2870   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2871   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2872   B->preallocated = PETSC_TRUE;
2873   PetscFunctionReturn(0);
2874 }
2875 EXTERN_C_END
2876 
2877 EXTERN_C_BEGIN
2878 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2879 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2880 EXTERN_C_END
2881 
2882 
2883 EXTERN_C_BEGIN
2884 #undef __FUNCT__
2885 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2886 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2887 {
2888   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2889   PetscErrorCode ierr;
2890   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2891   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2892   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2893 
2894   PetscFunctionBegin;
2895   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2896   ii[0] = 0;
2897   CHKMEMQ;
2898   for (i=0; i<M; i++) {
2899     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2900     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2901     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2902     /* remove one from count of matrix has diagonal */
2903     for (j=id[i]; j<id[i+1]; j++) {
2904       if (jd[j] == i) {ii[i+1]--;break;}
2905     }
2906   CHKMEMQ;
2907   }
2908   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2909   cnt = 0;
2910   for (i=0; i<M; i++) {
2911     for (j=io[i]; j<io[i+1]; j++) {
2912       if (garray[jo[j]] > rstart) break;
2913       jj[cnt++] = garray[jo[j]];
2914   CHKMEMQ;
2915     }
2916     for (k=id[i]; k<id[i+1]; k++) {
2917       if (jd[k] != i) {
2918         jj[cnt++] = rstart + jd[k];
2919   CHKMEMQ;
2920       }
2921     }
2922     for (;j<io[i+1]; j++) {
2923       jj[cnt++] = garray[jo[j]];
2924   CHKMEMQ;
2925     }
2926   }
2927   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2928   PetscFunctionReturn(0);
2929 }
2930 EXTERN_C_END
2931 
2932 /*MC
2933    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2934 
2935    Options Database Keys:
2936 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2937 . -mat_block_size <bs> - set the blocksize used to store the matrix
2938 - -mat_use_hash_table <fact>
2939 
2940   Level: beginner
2941 
2942 .seealso: MatCreateMPIBAIJ
2943 M*/
2944 
2945 EXTERN_C_BEGIN
2946 #undef __FUNCT__
2947 #define __FUNCT__ "MatCreate_MPIBAIJ"
2948 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2949 {
2950   Mat_MPIBAIJ    *b;
2951   PetscErrorCode ierr;
2952   PetscTruth     flg;
2953 
2954   PetscFunctionBegin;
2955   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2956   B->data = (void*)b;
2957 
2958 
2959   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2960   B->mapping    = 0;
2961   B->assembled  = PETSC_FALSE;
2962 
2963   B->insertmode = NOT_SET_VALUES;
2964   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2965   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2966 
2967   /* build local table of row and column ownerships */
2968   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2969 
2970   /* build cache for off array entries formed */
2971   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2972   b->donotstash  = PETSC_FALSE;
2973   b->colmap      = PETSC_NULL;
2974   b->garray      = PETSC_NULL;
2975   b->roworiented = PETSC_TRUE;
2976 
2977   /* stuff used in block assembly */
2978   b->barray       = 0;
2979 
2980   /* stuff used for matrix vector multiply */
2981   b->lvec         = 0;
2982   b->Mvctx        = 0;
2983 
2984   /* stuff for MatGetRow() */
2985   b->rowindices   = 0;
2986   b->rowvalues    = 0;
2987   b->getrowactive = PETSC_FALSE;
2988 
2989   /* hash table stuff */
2990   b->ht           = 0;
2991   b->hd           = 0;
2992   b->ht_size      = 0;
2993   b->ht_flag      = PETSC_FALSE;
2994   b->ht_fact      = 0;
2995   b->ht_total_ct  = 0;
2996   b->ht_insert_ct = 0;
2997 
2998   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2999     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3000     if (flg) {
3001       PetscReal fact = 1.39;
3002       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3003       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3004       if (fact <= 1.0) fact = 1.39;
3005       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3006       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3007     }
3008   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3009 
3010   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3011                                      "MatConvert_MPIBAIJ_MPIAdj",
3012                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3013   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3014                                      "MatStoreValues_MPIBAIJ",
3015                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3016   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3017                                      "MatRetrieveValues_MPIBAIJ",
3018                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3019   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3020                                      "MatGetDiagonalBlock_MPIBAIJ",
3021                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3022   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3023                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3024                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3025   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3026 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3027 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3028   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3029                                      "MatDiagonalScaleLocal_MPIBAIJ",
3030                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3031   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3032                                      "MatSetHashTableFactor_MPIBAIJ",
3033                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3034   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3035   PetscFunctionReturn(0);
3036 }
3037 EXTERN_C_END
3038 
3039 /*MC
3040    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3041 
3042    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3043    and MATMPIBAIJ otherwise.
3044 
3045    Options Database Keys:
3046 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3047 
3048   Level: beginner
3049 
3050 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3051 M*/
3052 
3053 EXTERN_C_BEGIN
3054 #undef __FUNCT__
3055 #define __FUNCT__ "MatCreate_BAIJ"
3056 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3057 {
3058   PetscErrorCode ierr;
3059   PetscMPIInt    size;
3060 
3061   PetscFunctionBegin;
3062   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3063   if (size == 1) {
3064     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3065   } else {
3066     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3067   }
3068   PetscFunctionReturn(0);
3069 }
3070 EXTERN_C_END
3071 
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3074 /*@C
3075    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3076    (block compressed row).  For good matrix assembly performance
3077    the user should preallocate the matrix storage by setting the parameters
3078    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3079    performance can be increased by more than a factor of 50.
3080 
3081    Collective on Mat
3082 
3083    Input Parameters:
3084 +  A - the matrix
3085 .  bs   - size of blockk
3086 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3087            submatrix  (same for all local rows)
3088 .  d_nnz - array containing the number of block nonzeros in the various block rows
3089            of the in diagonal portion of the local (possibly different for each block
3090            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3091 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3092            submatrix (same for all local rows).
3093 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3094            off-diagonal portion of the local submatrix (possibly different for
3095            each block row) or PETSC_NULL.
3096 
3097    If the *_nnz parameter is given then the *_nz parameter is ignored
3098 
3099    Options Database Keys:
3100 +   -mat_block_size - size of the blocks to use
3101 -   -mat_use_hash_table <fact>
3102 
3103    Notes:
3104    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3105    than it must be used on all processors that share the object for that argument.
3106 
3107    Storage Information:
3108    For a square global matrix we define each processor's diagonal portion
3109    to be its local rows and the corresponding columns (a square submatrix);
3110    each processor's off-diagonal portion encompasses the remainder of the
3111    local matrix (a rectangular submatrix).
3112 
3113    The user can specify preallocated storage for the diagonal part of
3114    the local submatrix with either d_nz or d_nnz (not both).  Set
3115    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3116    memory allocation.  Likewise, specify preallocated storage for the
3117    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3118 
3119    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3120    the figure below we depict these three local rows and all columns (0-11).
3121 
3122 .vb
3123            0 1 2 3 4 5 6 7 8 9 10 11
3124           -------------------
3125    row 3  |  o o o d d d o o o o o o
3126    row 4  |  o o o d d d o o o o o o
3127    row 5  |  o o o d d d o o o o o o
3128           -------------------
3129 .ve
3130 
3131    Thus, any entries in the d locations are stored in the d (diagonal)
3132    submatrix, and any entries in the o locations are stored in the
3133    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3134    stored simply in the MATSEQBAIJ format for compressed row storage.
3135 
3136    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3137    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3138    In general, for PDE problems in which most nonzeros are near the diagonal,
3139    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3140    or you will get TERRIBLE performance; see the users' manual chapter on
3141    matrices.
3142 
3143    You can call MatGetInfo() to get information on how effective the preallocation was;
3144    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3145    You can also run with the option -info and look for messages with the string
3146    malloc in them to see if additional memory allocation was needed.
3147 
3148    Level: intermediate
3149 
3150 .keywords: matrix, block, aij, compressed row, sparse, parallel
3151 
3152 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3153 @*/
3154 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3155 {
3156   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3157 
3158   PetscFunctionBegin;
3159   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3160   if (f) {
3161     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3162   }
3163   PetscFunctionReturn(0);
3164 }
3165 
3166 #undef __FUNCT__
3167 #define __FUNCT__ "MatCreateMPIBAIJ"
3168 /*@C
3169    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3170    (block compressed row).  For good matrix assembly performance
3171    the user should preallocate the matrix storage by setting the parameters
3172    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3173    performance can be increased by more than a factor of 50.
3174 
3175    Collective on MPI_Comm
3176 
3177    Input Parameters:
3178 +  comm - MPI communicator
3179 .  bs   - size of blockk
3180 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3181            This value should be the same as the local size used in creating the
3182            y vector for the matrix-vector product y = Ax.
3183 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3184            This value should be the same as the local size used in creating the
3185            x vector for the matrix-vector product y = Ax.
3186 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3187 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3188 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3189            submatrix  (same for all local rows)
3190 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3191            of the in diagonal portion of the local (possibly different for each block
3192            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3193 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3194            submatrix (same for all local rows).
3195 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3196            off-diagonal portion of the local submatrix (possibly different for
3197            each block row) or PETSC_NULL.
3198 
3199    Output Parameter:
3200 .  A - the matrix
3201 
3202    Options Database Keys:
3203 +   -mat_block_size - size of the blocks to use
3204 -   -mat_use_hash_table <fact>
3205 
3206    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3207    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3208    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3209 
3210    Notes:
3211    If the *_nnz parameter is given then the *_nz parameter is ignored
3212 
3213    A nonzero block is any block that as 1 or more nonzeros in it
3214 
3215    The user MUST specify either the local or global matrix dimensions
3216    (possibly both).
3217 
3218    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3219    than it must be used on all processors that share the object for that argument.
3220 
3221    Storage Information:
3222    For a square global matrix we define each processor's diagonal portion
3223    to be its local rows and the corresponding columns (a square submatrix);
3224    each processor's off-diagonal portion encompasses the remainder of the
3225    local matrix (a rectangular submatrix).
3226 
3227    The user can specify preallocated storage for the diagonal part of
3228    the local submatrix with either d_nz or d_nnz (not both).  Set
3229    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3230    memory allocation.  Likewise, specify preallocated storage for the
3231    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3232 
3233    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3234    the figure below we depict these three local rows and all columns (0-11).
3235 
3236 .vb
3237            0 1 2 3 4 5 6 7 8 9 10 11
3238           -------------------
3239    row 3  |  o o o d d d o o o o o o
3240    row 4  |  o o o d d d o o o o o o
3241    row 5  |  o o o d d d o o o o o o
3242           -------------------
3243 .ve
3244 
3245    Thus, any entries in the d locations are stored in the d (diagonal)
3246    submatrix, and any entries in the o locations are stored in the
3247    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3248    stored simply in the MATSEQBAIJ format for compressed row storage.
3249 
3250    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3251    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3252    In general, for PDE problems in which most nonzeros are near the diagonal,
3253    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3254    or you will get TERRIBLE performance; see the users' manual chapter on
3255    matrices.
3256 
3257    Level: intermediate
3258 
3259 .keywords: matrix, block, aij, compressed row, sparse, parallel
3260 
3261 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3262 @*/
3263 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3264 {
3265   PetscErrorCode ierr;
3266   PetscMPIInt    size;
3267 
3268   PetscFunctionBegin;
3269   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3270   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3271   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3272   if (size > 1) {
3273     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3274     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3275   } else {
3276     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3277     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3278   }
3279   PetscFunctionReturn(0);
3280 }
3281 
3282 #undef __FUNCT__
3283 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3284 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3285 {
3286   Mat            mat;
3287   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3288   PetscErrorCode ierr;
3289   PetscInt       len=0;
3290 
3291   PetscFunctionBegin;
3292   *newmat       = 0;
3293   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3294   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3295   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3296   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3297 
3298   mat->factor       = matin->factor;
3299   mat->preallocated = PETSC_TRUE;
3300   mat->assembled    = PETSC_TRUE;
3301   mat->insertmode   = NOT_SET_VALUES;
3302 
3303   a      = (Mat_MPIBAIJ*)mat->data;
3304   mat->rmap->bs  = matin->rmap->bs;
3305   a->bs2   = oldmat->bs2;
3306   a->mbs   = oldmat->mbs;
3307   a->nbs   = oldmat->nbs;
3308   a->Mbs   = oldmat->Mbs;
3309   a->Nbs   = oldmat->Nbs;
3310 
3311   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3312   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3313 
3314   a->size         = oldmat->size;
3315   a->rank         = oldmat->rank;
3316   a->donotstash   = oldmat->donotstash;
3317   a->roworiented  = oldmat->roworiented;
3318   a->rowindices   = 0;
3319   a->rowvalues    = 0;
3320   a->getrowactive = PETSC_FALSE;
3321   a->barray       = 0;
3322   a->rstartbs     = oldmat->rstartbs;
3323   a->rendbs       = oldmat->rendbs;
3324   a->cstartbs     = oldmat->cstartbs;
3325   a->cendbs       = oldmat->cendbs;
3326 
3327   /* hash table stuff */
3328   a->ht           = 0;
3329   a->hd           = 0;
3330   a->ht_size      = 0;
3331   a->ht_flag      = oldmat->ht_flag;
3332   a->ht_fact      = oldmat->ht_fact;
3333   a->ht_total_ct  = 0;
3334   a->ht_insert_ct = 0;
3335 
3336   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3337   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
3338   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3339   if (oldmat->colmap) {
3340 #if defined (PETSC_USE_CTABLE)
3341   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3342 #else
3343   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3344   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3345   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3346 #endif
3347   } else a->colmap = 0;
3348 
3349   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3350     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3351     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3352     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3353   } else a->garray = 0;
3354 
3355   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3356   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3357   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3358   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3359 
3360   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3361   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3362   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3363   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3364   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3365   *newmat = mat;
3366 
3367   PetscFunctionReturn(0);
3368 }
3369 
3370 #include "petscsys.h"
3371 
3372 #undef __FUNCT__
3373 #define __FUNCT__ "MatLoad_MPIBAIJ"
3374 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
3375 {
3376   Mat            A;
3377   PetscErrorCode ierr;
3378   int            fd;
3379   PetscInt       i,nz,j,rstart,rend;
3380   PetscScalar    *vals,*buf;
3381   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3382   MPI_Status     status;
3383   PetscMPIInt    rank,size,maxnz;
3384   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3385   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3386   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3387   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3388   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3389   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3390 
3391   PetscFunctionBegin;
3392   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3393     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3394   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3395 
3396   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3397   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3398   if (!rank) {
3399     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3400     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3401     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3402   }
3403 
3404   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3405   M = header[1]; N = header[2];
3406 
3407   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3408 
3409   /*
3410      This code adds extra rows to make sure the number of rows is
3411      divisible by the blocksize
3412   */
3413   Mbs        = M/bs;
3414   extra_rows = bs - M + bs*Mbs;
3415   if (extra_rows == bs) extra_rows = 0;
3416   else                  Mbs++;
3417   if (extra_rows && !rank) {
3418     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3419   }
3420 
3421   /* determine ownership of all rows */
3422   mbs        = Mbs/size + ((Mbs % size) > rank);
3423   m          = mbs*bs;
3424   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3425   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3426 
3427   /* process 0 needs enough room for process with most rows */
3428   if (!rank) {
3429     mmax = rowners[1];
3430     for (i=2; i<size; i++) {
3431       mmax = PetscMax(mmax,rowners[i]);
3432     }
3433     mmax*=bs;
3434   } else mmax = m;
3435 
3436   rowners[0] = 0;
3437   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3438   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3439   rstart = rowners[rank];
3440   rend   = rowners[rank+1];
3441 
3442   /* distribute row lengths to all processors */
3443   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3444   if (!rank) {
3445     mend = m;
3446     if (size == 1) mend = mend - extra_rows;
3447     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3448     for (j=mend; j<m; j++) locrowlens[j] = 1;
3449     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3450     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3451     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3452     for (j=0; j<m; j++) {
3453       procsnz[0] += locrowlens[j];
3454     }
3455     for (i=1; i<size; i++) {
3456       mend = browners[i+1] - browners[i];
3457       if (i == size-1) mend = mend - extra_rows;
3458       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3459       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3460       /* calculate the number of nonzeros on each processor */
3461       for (j=0; j<browners[i+1]-browners[i]; j++) {
3462         procsnz[i] += rowlengths[j];
3463       }
3464       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3465     }
3466     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3467   } else {
3468     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3469   }
3470 
3471   if (!rank) {
3472     /* determine max buffer needed and allocate it */
3473     maxnz = procsnz[0];
3474     for (i=1; i<size; i++) {
3475       maxnz = PetscMax(maxnz,procsnz[i]);
3476     }
3477     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3478 
3479     /* read in my part of the matrix column indices  */
3480     nz     = procsnz[0];
3481     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3482     mycols = ibuf;
3483     if (size == 1)  nz -= extra_rows;
3484     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3485     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3486 
3487     /* read in every ones (except the last) and ship off */
3488     for (i=1; i<size-1; i++) {
3489       nz   = procsnz[i];
3490       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3491       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3492     }
3493     /* read in the stuff for the last proc */
3494     if (size != 1) {
3495       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3496       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3497       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3498       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3499     }
3500     ierr = PetscFree(cols);CHKERRQ(ierr);
3501   } else {
3502     /* determine buffer space needed for message */
3503     nz = 0;
3504     for (i=0; i<m; i++) {
3505       nz += locrowlens[i];
3506     }
3507     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3508     mycols = ibuf;
3509     /* receive message of column indices*/
3510     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3511     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3512     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3513   }
3514 
3515   /* loop over local rows, determining number of off diagonal entries */
3516   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3517   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3518   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3519   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3520   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3521   rowcount = 0; nzcount = 0;
3522   for (i=0; i<mbs; i++) {
3523     dcount  = 0;
3524     odcount = 0;
3525     for (j=0; j<bs; j++) {
3526       kmax = locrowlens[rowcount];
3527       for (k=0; k<kmax; k++) {
3528         tmp = mycols[nzcount++]/bs;
3529         if (!mask[tmp]) {
3530           mask[tmp] = 1;
3531           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3532           else masked1[dcount++] = tmp;
3533         }
3534       }
3535       rowcount++;
3536     }
3537 
3538     dlens[i]  = dcount;
3539     odlens[i] = odcount;
3540 
3541     /* zero out the mask elements we set */
3542     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3543     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3544   }
3545 
3546   /* create our matrix */
3547   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3548   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3549   ierr = MatSetType(A,type);CHKERRQ(ierr)
3550   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3551 
3552   if (!rank) {
3553     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3554     /* read in my part of the matrix numerical values  */
3555     nz = procsnz[0];
3556     vals = buf;
3557     mycols = ibuf;
3558     if (size == 1)  nz -= extra_rows;
3559     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3560     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3561 
3562     /* insert into matrix */
3563     jj      = rstart*bs;
3564     for (i=0; i<m; i++) {
3565       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3566       mycols += locrowlens[i];
3567       vals   += locrowlens[i];
3568       jj++;
3569     }
3570     /* read in other processors (except the last one) and ship out */
3571     for (i=1; i<size-1; i++) {
3572       nz   = procsnz[i];
3573       vals = buf;
3574       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3575       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3576     }
3577     /* the last proc */
3578     if (size != 1){
3579       nz   = procsnz[i] - extra_rows;
3580       vals = buf;
3581       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3582       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3583       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3584     }
3585     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3586   } else {
3587     /* receive numeric values */
3588     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3589 
3590     /* receive message of values*/
3591     vals   = buf;
3592     mycols = ibuf;
3593     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3594     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3595     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3596 
3597     /* insert into matrix */
3598     jj      = rstart*bs;
3599     for (i=0; i<m; i++) {
3600       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3601       mycols += locrowlens[i];
3602       vals   += locrowlens[i];
3603       jj++;
3604     }
3605   }
3606   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3607   ierr = PetscFree(buf);CHKERRQ(ierr);
3608   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3609   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3610   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3611   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3612   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3613   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3614 
3615   *newmat = A;
3616   PetscFunctionReturn(0);
3617 }
3618 
3619 #undef __FUNCT__
3620 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3621 /*@
3622    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3623 
3624    Input Parameters:
3625 .  mat  - the matrix
3626 .  fact - factor
3627 
3628    Collective on Mat
3629 
3630    Level: advanced
3631 
3632   Notes:
3633    This can also be set by the command line option: -mat_use_hash_table <fact>
3634 
3635 .keywords: matrix, hashtable, factor, HT
3636 
3637 .seealso: MatSetOption()
3638 @*/
3639 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3640 {
3641   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3642 
3643   PetscFunctionBegin;
3644   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3645   if (f) {
3646     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3647   }
3648   PetscFunctionReturn(0);
3649 }
3650 
3651 EXTERN_C_BEGIN
3652 #undef __FUNCT__
3653 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3654 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3655 {
3656   Mat_MPIBAIJ *baij;
3657 
3658   PetscFunctionBegin;
3659   baij = (Mat_MPIBAIJ*)mat->data;
3660   baij->ht_fact = fact;
3661   PetscFunctionReturn(0);
3662 }
3663 EXTERN_C_END
3664 
3665 #undef __FUNCT__
3666 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3667 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3668 {
3669   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3670   PetscFunctionBegin;
3671   *Ad     = a->A;
3672   *Ao     = a->B;
3673   *colmap = a->garray;
3674   PetscFunctionReturn(0);
3675 }
3676 
3677 /*
3678     Special version for direct calls from Fortran (to eliminate two function call overheads
3679 */
3680 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3681 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3682 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3683 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3684 #endif
3685 
3686 #undef __FUNCT__
3687 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3688 /*@C
3689   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3690 
3691   Collective on Mat
3692 
3693   Input Parameters:
3694 + mat - the matrix
3695 . min - number of input rows
3696 . im - input rows
3697 . nin - number of input columns
3698 . in - input columns
3699 . v - numerical values input
3700 - addvin - INSERT_VALUES or ADD_VALUES
3701 
3702   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3703 
3704   Level: advanced
3705 
3706 .seealso:   MatSetValuesBlocked()
3707 @*/
3708 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3709 {
3710   /* convert input arguments to C version */
3711   Mat             mat = *matin;
3712   PetscInt        m = *min, n = *nin;
3713   InsertMode      addv = *addvin;
3714 
3715   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3716   const MatScalar *value;
3717   MatScalar       *barray=baij->barray;
3718   PetscTruth      roworiented = baij->roworiented;
3719   PetscErrorCode  ierr;
3720   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3721   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3722   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3723 
3724   PetscFunctionBegin;
3725   /* tasks normally handled by MatSetValuesBlocked() */
3726   if (mat->insertmode == NOT_SET_VALUES) {
3727     mat->insertmode = addv;
3728   }
3729 #if defined(PETSC_USE_DEBUG)
3730   else if (mat->insertmode != addv) {
3731     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3732   }
3733   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3734 #endif
3735   if (mat->assembled) {
3736     mat->was_assembled = PETSC_TRUE;
3737     mat->assembled     = PETSC_FALSE;
3738   }
3739   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3740 
3741 
3742   if(!barray) {
3743     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3744     baij->barray = barray;
3745   }
3746 
3747   if (roworiented) {
3748     stepval = (n-1)*bs;
3749   } else {
3750     stepval = (m-1)*bs;
3751   }
3752   for (i=0; i<m; i++) {
3753     if (im[i] < 0) continue;
3754 #if defined(PETSC_USE_DEBUG)
3755     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3756 #endif
3757     if (im[i] >= rstart && im[i] < rend) {
3758       row = im[i] - rstart;
3759       for (j=0; j<n; j++) {
3760         /* If NumCol = 1 then a copy is not required */
3761         if ((roworiented) && (n == 1)) {
3762           barray = (MatScalar*)v + i*bs2;
3763         } else if((!roworiented) && (m == 1)) {
3764           barray = (MatScalar*)v + j*bs2;
3765         } else { /* Here a copy is required */
3766           if (roworiented) {
3767             value = v + i*(stepval+bs)*bs + j*bs;
3768           } else {
3769             value = v + j*(stepval+bs)*bs + i*bs;
3770           }
3771           for (ii=0; ii<bs; ii++,value+=stepval) {
3772             for (jj=0; jj<bs; jj++) {
3773               *barray++  = *value++;
3774             }
3775           }
3776           barray -=bs2;
3777         }
3778 
3779         if (in[j] >= cstart && in[j] < cend){
3780           col  = in[j] - cstart;
3781           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3782         }
3783         else if (in[j] < 0) continue;
3784 #if defined(PETSC_USE_DEBUG)
3785         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
3786 #endif
3787         else {
3788           if (mat->was_assembled) {
3789             if (!baij->colmap) {
3790               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3791             }
3792 
3793 #if defined(PETSC_USE_DEBUG)
3794 #if defined (PETSC_USE_CTABLE)
3795             { PetscInt data;
3796               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3797               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3798             }
3799 #else
3800             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3801 #endif
3802 #endif
3803 #if defined (PETSC_USE_CTABLE)
3804 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3805             col  = (col - 1)/bs;
3806 #else
3807             col = (baij->colmap[in[j]] - 1)/bs;
3808 #endif
3809             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3810               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3811               col =  in[j];
3812             }
3813           }
3814           else col = in[j];
3815           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3816         }
3817       }
3818     } else {
3819       if (!baij->donotstash) {
3820         if (roworiented) {
3821           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3822         } else {
3823           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3824         }
3825       }
3826     }
3827   }
3828 
3829   /* task normally handled by MatSetValuesBlocked() */
3830   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3831   PetscFunctionReturn(0);
3832 }
3833