xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 5a0ac93ed2df1a79ae554899664ed51ecbf5ef94)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
19 {
20   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       i,*idxb = 0;
23   PetscScalar    *va,*vb;
24   Vec            vtmp;
25 
26   PetscFunctionBegin;
27 
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap.n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define CHUNKSIZE  10
111 
112 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
113 { \
114  \
115     brow = row/bs;  \
116     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
117     rmax = aimax[brow]; nrow = ailen[brow]; \
118       bcol = col/bs; \
119       ridx = row % bs; cidx = col % bs; \
120       low = 0; high = nrow; \
121       while (high-low > 3) { \
122         t = (low+high)/2; \
123         if (rp[t] > bcol) high = t; \
124         else              low  = t; \
125       } \
126       for (_i=low; _i<high; _i++) { \
127         if (rp[_i] > bcol) break; \
128         if (rp[_i] == bcol) { \
129           bap  = ap +  bs2*_i + bs*cidx + ridx; \
130           if (addv == ADD_VALUES) *bap += value;  \
131           else                    *bap  = value;  \
132           goto a_noinsert; \
133         } \
134       } \
135       if (a->nonew == 1) goto a_noinsert; \
136       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
137       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
138       N = nrow++ - 1;  \
139       /* shift up all the later entries in this row */ \
140       for (ii=N; ii>=_i; ii--) { \
141         rp[ii+1] = rp[ii]; \
142         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
143       } \
144       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
145       rp[_i]                      = bcol;  \
146       ap[bs2*_i + bs*cidx + ridx] = value;  \
147       a_noinsert:; \
148     ailen[brow] = nrow; \
149 }
150 
151 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
152 { \
153     brow = row/bs;  \
154     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
155     rmax = bimax[brow]; nrow = bilen[brow]; \
156       bcol = col/bs; \
157       ridx = row % bs; cidx = col % bs; \
158       low = 0; high = nrow; \
159       while (high-low > 3) { \
160         t = (low+high)/2; \
161         if (rp[t] > bcol) high = t; \
162         else              low  = t; \
163       } \
164       for (_i=low; _i<high; _i++) { \
165         if (rp[_i] > bcol) break; \
166         if (rp[_i] == bcol) { \
167           bap  = ap +  bs2*_i + bs*cidx + ridx; \
168           if (addv == ADD_VALUES) *bap += value;  \
169           else                    *bap  = value;  \
170           goto b_noinsert; \
171         } \
172       } \
173       if (b->nonew == 1) goto b_noinsert; \
174       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
175       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
176       CHKMEMQ;\
177       N = nrow++ - 1;  \
178       /* shift up all the later entries in this row */ \
179       for (ii=N; ii>=_i; ii--) { \
180         rp[ii+1] = rp[ii]; \
181         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
182       } \
183       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
184       rp[_i]                      = bcol;  \
185       ap[bs2*_i + bs*cidx + ridx] = value;  \
186       b_noinsert:; \
187     bilen[brow] = nrow; \
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatSetValues_MPIBAIJ"
192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
193 {
194   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
195   MatScalar      value;
196   PetscTruth     roworiented = baij->roworiented;
197   PetscErrorCode ierr;
198   PetscInt       i,j,row,col;
199   PetscInt       rstart_orig=mat->rmap.rstart;
200   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
201   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
202 
203   /* Some Variables required in the macro */
204   Mat            A = baij->A;
205   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
206   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
207   MatScalar      *aa=a->a;
208 
209   Mat            B = baij->B;
210   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
211   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
212   MatScalar      *ba=b->a;
213 
214   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
215   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
216   MatScalar      *ap,*bap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_DEBUG)
222     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
223 #endif
224     if (im[i] >= rstart_orig && im[i] < rend_orig) {
225       row = im[i] - rstart_orig;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart_orig && in[j] < cend_orig){
228           col = in[j] - cstart_orig;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_DEBUG)
234         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!baij->colmap) {
239               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
243             col  = col - 1;
244 #else
245             col = baij->colmap[in[j]/bs] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
248               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
251               B = baij->B;
252               b = (Mat_SeqBAIJ*)(B)->data;
253               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
254               ba=b->a;
255             } else col += in[j]%bs;
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscTruth        roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + i*(stepval+bs)*bs + j*bs;
315           } else {
316             value = v + j*(stepval+bs)*bs + i*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=stepval) {
319             for (jj=0; jj<bs; jj++) {
320               *barray++  = *value++;
321             }
322           }
323           barray -=bs2;
324         }
325 
326         if (in[j] >= cstart && in[j] < cend){
327           col  = in[j] - cstart;
328           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
329         }
330         else if (in[j] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
333 #endif
334         else {
335           if (mat->was_assembled) {
336             if (!baij->colmap) {
337               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
338             }
339 
340 #if defined(PETSC_USE_DEBUG)
341 #if defined (PETSC_USE_CTABLE)
342             { PetscInt data;
343               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
344               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
345             }
346 #else
347             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
348 #endif
349 #endif
350 #if defined (PETSC_USE_CTABLE)
351 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
352             col  = (col - 1)/bs;
353 #else
354             col = (baij->colmap[in[j]] - 1)/bs;
355 #endif
356             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
357               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
358               col =  in[j];
359             }
360           }
361           else col = in[j];
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364       }
365     } else {
366       if (!baij->donotstash) {
367         if (roworiented) {
368           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         } else {
370           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         }
372       }
373     }
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #define HASH_KEY 0.6180339887
379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
385 {
386   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
387   PetscTruth     roworiented = baij->roworiented;
388   PetscErrorCode ierr;
389   PetscInt       i,j,row,col;
390   PetscInt       rstart_orig=mat->rmap.rstart;
391   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
392   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
393   PetscReal      tmp;
394   MatScalar      **HD = baij->hd,value;
395 #if defined(PETSC_USE_DEBUG)
396   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
397 #endif
398 
399   PetscFunctionBegin;
400 
401   for (i=0; i<m; i++) {
402 #if defined(PETSC_USE_DEBUG)
403     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
404     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
405 #endif
406       row = im[i];
407     if (row >= rstart_orig && row < rend_orig) {
408       for (j=0; j<n; j++) {
409         col = in[j];
410         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411         /* Look up PetscInto the Hash Table */
412         key = (row/bs)*Nbs+(col/bs)+1;
413         h1  = HASH(size,key,tmp);
414 
415 
416         idx = h1;
417 #if defined(PETSC_USE_DEBUG)
418         insert_ct++;
419         total_ct++;
420         if (HT[idx] != key) {
421           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
422           if (idx == size) {
423             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
424             if (idx == h1) {
425               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
426             }
427           }
428         }
429 #else
430         if (HT[idx] != key) {
431           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
432           if (idx == size) {
433             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
434             if (idx == h1) {
435               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436             }
437           }
438         }
439 #endif
440         /* A HASH table entry is found, so insert the values at the correct address */
441         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
442         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454 #if defined(PETSC_USE_DEBUG)
455   baij->ht_total_ct = total_ct;
456   baij->ht_insert_ct = insert_ct;
457 #endif
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
466   PetscTruth        roworiented = baij->roworiented;
467   PetscErrorCode    ierr;
468   PetscInt          i,j,ii,jj,row,col;
469   PetscInt          rstart=baij->rstartbs;
470   PetscInt          rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2;
471   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
472   PetscReal         tmp;
473   MatScalar         **HD = baij->hd,*baij_a;
474   const PetscScalar *v_t,*value;
475 #if defined(PETSC_USE_DEBUG)
476   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
477 #endif
478 
479   PetscFunctionBegin;
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487 #if defined(PETSC_USE_DEBUG)
488     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     row   = im[i];
492     v_t   = v + i*nbs2;
493     if (row >= rstart && row < rend) {
494       for (j=0; j<n; j++) {
495         col = in[j];
496 
497         /* Look up into the Hash Table */
498         key = row*Nbs+col+1;
499         h1  = HASH(size,key,tmp);
500 
501         idx = h1;
502 #if defined(PETSC_USE_DEBUG)
503         total_ct++;
504         insert_ct++;
505        if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
509             if (idx == h1) {
510               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
511             }
512           }
513         }
514 #else
515         if (HT[idx] != key) {
516           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
517           if (idx == size) {
518             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
519             if (idx == h1) {
520               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
521             }
522           }
523         }
524 #endif
525         baij_a = HD[idx];
526         if (roworiented) {
527           /*value = v + i*(stepval+bs)*bs + j*bs;*/
528           /* value = v + (i*(stepval+bs)+j)*bs; */
529           value = v_t;
530           v_t  += bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval) {
539               for (jj=ii; jj<bs2; jj+=bs) {
540                 baij_a[jj]  = *value++;
541               }
542             }
543           }
544         } else {
545           value = v + j*(stepval+bs)*bs + i*bs;
546           if (addv == ADD_VALUES) {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  += *value++;
550               }
551             }
552           } else {
553             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
554               for (jj=0; jj<bs; jj++) {
555                 baij_a[jj]  = *value++;
556               }
557             }
558           }
559         }
560       }
561     } else {
562       if (!baij->donotstash) {
563         if (roworiented) {
564           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
565         } else {
566           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571 #if defined(PETSC_USE_DEBUG)
572   baij->ht_total_ct = total_ct;
573   baij->ht_insert_ct = insert_ct;
574 #endif
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetValues_MPIBAIJ"
580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
581 {
582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583   PetscErrorCode ierr;
584   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
585   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
586 
587   PetscFunctionBegin;
588   for (i=0; i<m; i++) {
589     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
590     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
591     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
592       row = idxm[i] - bsrstart;
593       for (j=0; j<n; j++) {
594         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
595         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
596         if (idxn[j] >= bscstart && idxn[j] < bscend){
597           col = idxn[j] - bscstart;
598           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599         } else {
600           if (!baij->colmap) {
601             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
602           }
603 #if defined (PETSC_USE_CTABLE)
604           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
605           data --;
606 #else
607           data = baij->colmap[idxn[j]/bs]-1;
608 #endif
609           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
610           else {
611             col  = data + idxn[j]%bs;
612             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
613           }
614         }
615       }
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
618     }
619   }
620  PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatNorm_MPIBAIJ"
625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
626 {
627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
628   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
631   PetscReal      sum = 0.0;
632   MatScalar      *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       nz = amat->nz*bs2;
641       for (i=0; i<nz; i++) {
642 #if defined(PETSC_USE_COMPLEX)
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644 #else
645         sum += (*v)*(*v); v++;
646 #endif
647       }
648       v = bmat->a;
649       nz = bmat->nz*bs2;
650       for (i=0; i<nz; i++) {
651 #if defined(PETSC_USE_COMPLEX)
652         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
653 #else
654         sum += (*v)*(*v); v++;
655 #endif
656       }
657       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       *nrm = sqrt(*nrm);
659     } else if (type == NORM_1) { /* max column sum */
660       PetscReal *tmp,*tmp2;
661       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
662       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
663       tmp2 = tmp + mat->cmap.N;
664       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
665       v = amat->a; jj = amat->j;
666       for (i=0; i<amat->nz; i++) {
667         for (j=0; j<bs; j++){
668           col = bs*(cstart + *jj) + j; /* column index */
669           for (row=0; row<bs; row++){
670             tmp[col] += PetscAbsScalar(*v);  v++;
671           }
672         }
673         jj++;
674       }
675       v = bmat->a; jj = bmat->j;
676       for (i=0; i<bmat->nz; i++) {
677         for (j=0; j<bs; j++){
678           col = bs*garray[*jj] + j;
679           for (row=0; row<bs; row++){
680             tmp[col] += PetscAbsScalar(*v); v++;
681           }
682         }
683         jj++;
684       }
685       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
686       *nrm = 0.0;
687       for (j=0; j<mat->cmap.N; j++) {
688         if (tmp2[j] > *nrm) *nrm = tmp2[j];
689       }
690       ierr = PetscFree(tmp);CHKERRQ(ierr);
691     } else if (type == NORM_INFINITY) { /* max row sum */
692       PetscReal *sums;
693       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
694       sum = 0.0;
695       for (j=0; j<amat->mbs; j++) {
696         for (row=0; row<bs; row++) sums[row] = 0.0;
697         v = amat->a + bs2*amat->i[j];
698         nz = amat->i[j+1]-amat->i[j];
699         for (i=0; i<nz; i++) {
700           for (col=0; col<bs; col++){
701             for (row=0; row<bs; row++){
702               sums[row] += PetscAbsScalar(*v); v++;
703             }
704           }
705         }
706         v = bmat->a + bs2*bmat->i[j];
707         nz = bmat->i[j+1]-bmat->i[j];
708         for (i=0; i<nz; i++) {
709           for (col=0; col<bs; col++){
710             for (row=0; row<bs; row++){
711               sums[row] += PetscAbsScalar(*v); v++;
712             }
713           }
714         }
715         for (row=0; row<bs; row++){
716           if (sums[row] > sum) sum = sums[row];
717         }
718       }
719       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
720       ierr = PetscFree(sums);CHKERRQ(ierr);
721     } else {
722       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
723     }
724   }
725   PetscFunctionReturn(0);
726 }
727 
728 /*
729   Creates the hash table, and sets the table
730   This table is created only once.
731   If new entried need to be added to the matrix
732   then the hash table has to be destroyed and
733   recreated.
734 */
735 #undef __FUNCT__
736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
738 {
739   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
740   Mat            A = baij->A,B=baij->B;
741   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
742   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
743   PetscErrorCode ierr;
744   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
745   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
746   PetscInt       *HT,key;
747   MatScalar      **HD;
748   PetscReal      tmp;
749 #if defined(PETSC_USE_INFO)
750   PetscInt       ct=0,max=0;
751 #endif
752 
753   PetscFunctionBegin;
754   baij->ht_size=(PetscInt)(factor*nz);
755   size = baij->ht_size;
756 
757   if (baij->ht) {
758     PetscFunctionReturn(0);
759   }
760 
761   /* Allocate Memory for Hash Table */
762   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
763   baij->ht = (PetscInt*)(baij->hd + size);
764   HD       = baij->hd;
765   HT       = baij->ht;
766 
767 
768   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
769 
770 
771   /* Loop Over A */
772   for (i=0; i<a->mbs; i++) {
773     for (j=ai[i]; j<ai[i+1]; j++) {
774       row = i+rstart;
775       col = aj[j]+cstart;
776 
777       key = row*Nbs + col + 1;
778       h1  = HASH(size,key,tmp);
779       for (k=0; k<size; k++){
780         if (!HT[(h1+k)%size]) {
781           HT[(h1+k)%size] = key;
782           HD[(h1+k)%size] = a->a + j*bs2;
783           break;
784 #if defined(PETSC_USE_INFO)
785         } else {
786           ct++;
787 #endif
788         }
789       }
790 #if defined(PETSC_USE_INFO)
791       if (k> max) max = k;
792 #endif
793     }
794   }
795   /* Loop Over B */
796   for (i=0; i<b->mbs; i++) {
797     for (j=bi[i]; j<bi[i+1]; j++) {
798       row = i+rstart;
799       col = garray[bj[j]];
800       key = row*Nbs + col + 1;
801       h1  = HASH(size,key,tmp);
802       for (k=0; k<size; k++){
803         if (!HT[(h1+k)%size]) {
804           HT[(h1+k)%size] = key;
805           HD[(h1+k)%size] = b->a + j*bs2;
806           break;
807 #if defined(PETSC_USE_INFO)
808         } else {
809           ct++;
810 #endif
811         }
812       }
813 #if defined(PETSC_USE_INFO)
814       if (k> max) max = k;
815 #endif
816     }
817   }
818 
819   /* Print Summary */
820 #if defined(PETSC_USE_INFO)
821   for (i=0,j=0; i<size; i++) {
822     if (HT[i]) {j++;}
823   }
824   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
825 #endif
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
831 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
832 {
833   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
834   PetscErrorCode ierr;
835   PetscInt       nstash,reallocs;
836   InsertMode     addv;
837 
838   PetscFunctionBegin;
839   if (baij->donotstash) {
840     PetscFunctionReturn(0);
841   }
842 
843   /* make sure all processors are either in INSERTMODE or ADDMODE */
844   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
845   if (addv == (ADD_VALUES|INSERT_VALUES)) {
846     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
847   }
848   mat->insertmode = addv; /* in case this processor had no cache */
849 
850   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
851   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
852   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
853   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
854   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
855   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
856   PetscFunctionReturn(0);
857 }
858 
859 #undef __FUNCT__
860 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
861 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
862 {
863   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
864   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
865   PetscErrorCode ierr;
866   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
867   PetscInt       *row,*col;
868   PetscTruth     r1,r2,r3,other_disassembled;
869   MatScalar      *val;
870   InsertMode     addv = mat->insertmode;
871   PetscMPIInt    n;
872 
873   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
874   PetscFunctionBegin;
875   if (!baij->donotstash) {
876     while (1) {
877       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
878       if (!flg) break;
879 
880       for (i=0; i<n;) {
881         /* Now identify the consecutive vals belonging to the same row */
882         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
883         if (j < n) ncols = j-i;
884         else       ncols = n-i;
885         /* Now assemble all these values with a single function call */
886         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
887         i = j;
888       }
889     }
890     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
891     /* Now process the block-stash. Since the values are stashed column-oriented,
892        set the roworiented flag to column oriented, and after MatSetValues()
893        restore the original flags */
894     r1 = baij->roworiented;
895     r2 = a->roworiented;
896     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
897     baij->roworiented = PETSC_FALSE;
898     a->roworiented    = PETSC_FALSE;
899     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
900     while (1) {
901       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
902       if (!flg) break;
903 
904       for (i=0; i<n;) {
905         /* Now identify the consecutive vals belonging to the same row */
906         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
907         if (j < n) ncols = j-i;
908         else       ncols = n-i;
909         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
910         i = j;
911       }
912     }
913     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
914     baij->roworiented = r1;
915     a->roworiented    = r2;
916     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
917   }
918 
919   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
920   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
921 
922   /* determine if any processor has disassembled, if so we must
923      also disassemble ourselfs, in order that we may reassemble. */
924   /*
925      if nonzero structure of submatrix B cannot change then we know that
926      no processor disassembled thus we can skip this stuff
927   */
928   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
929     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
930     if (mat->was_assembled && !other_disassembled) {
931       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
932     }
933   }
934 
935   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
936     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
937   }
938   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
939   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
940   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
941 
942 #if defined(PETSC_USE_INFO)
943   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
944     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
945     baij->ht_total_ct  = 0;
946     baij->ht_insert_ct = 0;
947   }
948 #endif
949   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
950     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
951     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
952     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
953   }
954 
955   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
956   baij->rowvalues = 0;
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
962 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
963 {
964   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
965   PetscErrorCode    ierr;
966   PetscMPIInt       size = baij->size,rank = baij->rank;
967   PetscInt          bs = mat->rmap.bs;
968   PetscTruth        iascii,isdraw;
969   PetscViewer       sviewer;
970   PetscViewerFormat format;
971 
972   PetscFunctionBegin;
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
974   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
975   if (iascii) {
976     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
977     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
978       MatInfo info;
979       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
980       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
981       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
982               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
983               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
984       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
985       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
986       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
987       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
988       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
989       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
990       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
991       PetscFunctionReturn(0);
992     } else if (format == PETSC_VIEWER_ASCII_INFO) {
993       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
994       PetscFunctionReturn(0);
995     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
996       PetscFunctionReturn(0);
997     }
998   }
999 
1000   if (isdraw) {
1001     PetscDraw       draw;
1002     PetscTruth isnull;
1003     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1004     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1005   }
1006 
1007   if (size == 1) {
1008     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1009     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1010   } else {
1011     /* assemble the entire matrix onto first processor. */
1012     Mat         A;
1013     Mat_SeqBAIJ *Aloc;
1014     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1015     MatScalar   *a;
1016 
1017     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1018     /* Perhaps this should be the type of mat? */
1019     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1020     if (!rank) {
1021       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1022     } else {
1023       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1024     }
1025     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1026     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1027     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1028 
1029     /* copy over the A part */
1030     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1031     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1032     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1033 
1034     for (i=0; i<mbs; i++) {
1035       rvals[0] = bs*(baij->rstartbs + i);
1036       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1037       for (j=ai[i]; j<ai[i+1]; j++) {
1038         col = (baij->cstartbs+aj[j])*bs;
1039         for (k=0; k<bs; k++) {
1040           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1041           col++; a += bs;
1042         }
1043       }
1044     }
1045     /* copy over the B part */
1046     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1047     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1048     for (i=0; i<mbs; i++) {
1049       rvals[0] = bs*(baij->rstartbs + i);
1050       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1051       for (j=ai[i]; j<ai[i+1]; j++) {
1052         col = baij->garray[aj[j]]*bs;
1053         for (k=0; k<bs; k++) {
1054           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1055           col++; a += bs;
1056         }
1057       }
1058     }
1059     ierr = PetscFree(rvals);CHKERRQ(ierr);
1060     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1061     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1062     /*
1063        Everyone has to call to draw the matrix since the graphics waits are
1064        synchronized across all processors that share the PetscDraw object
1065     */
1066     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1067     if (!rank) {
1068       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1069       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1070     }
1071     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1072     ierr = MatDestroy(A);CHKERRQ(ierr);
1073   }
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatView_MPIBAIJ"
1079 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1080 {
1081   PetscErrorCode ierr;
1082   PetscTruth     iascii,isdraw,issocket,isbinary;
1083 
1084   PetscFunctionBegin;
1085   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1086   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1087   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1088   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1089   if (iascii || isdraw || issocket || isbinary) {
1090     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1091   } else {
1092     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1093   }
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1099 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1100 {
1101   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105 #if defined(PETSC_USE_LOG)
1106   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1107 #endif
1108   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1109   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1110   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1111   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1112 #if defined (PETSC_USE_CTABLE)
1113   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1114 #else
1115   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1116 #endif
1117   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1118   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1119   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1120   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1121   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1122   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1123   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1124   ierr = PetscFree(baij);CHKERRQ(ierr);
1125 
1126   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatMult_MPIBAIJ"
1139 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1140 {
1141   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1142   PetscErrorCode ierr;
1143   PetscInt       nt;
1144 
1145   PetscFunctionBegin;
1146   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1147   if (nt != A->cmap.n) {
1148     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1149   }
1150   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1151   if (nt != A->rmap.n) {
1152     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1153   }
1154   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1155   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1156   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1157   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1163 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1164 {
1165   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1166   PetscErrorCode ierr;
1167 
1168   PetscFunctionBegin;
1169   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1170   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1171   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1172   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1178 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1179 {
1180   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1181   PetscErrorCode ierr;
1182   PetscTruth     merged;
1183 
1184   PetscFunctionBegin;
1185   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1186   /* do nondiagonal part */
1187   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1188   if (!merged) {
1189     /* send it on its way */
1190     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1191     /* do local part */
1192     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1193     /* receive remote parts: note this assumes the values are not actually */
1194     /* inserted in yy until the next line */
1195     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1196   } else {
1197     /* do local part */
1198     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1199     /* send it on its way */
1200     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1201     /* values actually were received in the Begin() but we need to call this nop */
1202     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1209 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1210 {
1211   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1212   PetscErrorCode ierr;
1213 
1214   PetscFunctionBegin;
1215   /* do nondiagonal part */
1216   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1217   /* send it on its way */
1218   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219   /* do local part */
1220   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1221   /* receive remote parts: note this assumes the values are not actually */
1222   /* inserted in yy until the next line, which is true for my implementation*/
1223   /* but is not perhaps always true. */
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1234 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1235 {
1236   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1241   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatScale_MPIBAIJ"
1247 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1248 {
1249   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1254   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1260 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1266   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1267   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1278     PetscInt     max = 1,mbs = mat->mbs,tmp;
1279     for (i=0; i<mbs; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1285   }
1286 
1287   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1288   lrow = row - brstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]/bs] < cstart)
1321               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1322             else break;
1323           }
1324           imark = i;
1325         }
1326         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1327         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1328       }
1329     } else {
1330       if (idx) *idx = 0;
1331       if (v)   *v   = 0;
1332     }
1333   }
1334   *nz = nztot;
1335   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1336   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1342 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1343 {
1344   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1345 
1346   PetscFunctionBegin;
1347   if (!baij->getrowactive) {
1348     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1349   }
1350   baij->getrowactive = PETSC_FALSE;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1356 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1357 {
1358   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1363   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1369 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1370 {
1371   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1372   Mat            A = a->A,B = a->B;
1373   PetscErrorCode ierr;
1374   PetscReal      isend[5],irecv[5];
1375 
1376   PetscFunctionBegin;
1377   info->block_size     = (PetscReal)matin->rmap.bs;
1378   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1379   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1380   isend[3] = info->memory;  isend[4] = info->mallocs;
1381   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1382   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1383   isend[3] += info->memory;  isend[4] += info->mallocs;
1384   if (flag == MAT_LOCAL) {
1385     info->nz_used      = isend[0];
1386     info->nz_allocated = isend[1];
1387     info->nz_unneeded  = isend[2];
1388     info->memory       = isend[3];
1389     info->mallocs      = isend[4];
1390   } else if (flag == MAT_GLOBAL_MAX) {
1391     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1392     info->nz_used      = irecv[0];
1393     info->nz_allocated = irecv[1];
1394     info->nz_unneeded  = irecv[2];
1395     info->memory       = irecv[3];
1396     info->mallocs      = irecv[4];
1397   } else if (flag == MAT_GLOBAL_SUM) {
1398     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1399     info->nz_used      = irecv[0];
1400     info->nz_allocated = irecv[1];
1401     info->nz_unneeded  = irecv[2];
1402     info->memory       = irecv[3];
1403     info->mallocs      = irecv[4];
1404   } else {
1405     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1406   }
1407   info->rows_global       = (PetscReal)A->rmap.N;
1408   info->columns_global    = (PetscReal)A->cmap.N;
1409   info->rows_local        = (PetscReal)A->rmap.N;
1410   info->columns_local     = (PetscReal)A->cmap.N;
1411   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1412   info->fill_ratio_needed = 0;
1413   info->factor_mallocs    = 0;
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1419 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1420 {
1421   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1422   PetscErrorCode ierr;
1423 
1424   PetscFunctionBegin;
1425   switch (op) {
1426   case MAT_NEW_NONZERO_LOCATIONS:
1427   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1428   case MAT_KEEP_ZEROED_ROWS:
1429   case MAT_NEW_NONZERO_LOCATION_ERR:
1430     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1431     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1432     break;
1433   case MAT_ROW_ORIENTED:
1434     a->roworiented = flg;
1435     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1436     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1437     break;
1438   case MAT_NEW_DIAGONALS:
1439     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1440     break;
1441   case MAT_IGNORE_OFF_PROC_ENTRIES:
1442     a->donotstash = flg;
1443     break;
1444   case MAT_USE_HASH_TABLE:
1445     a->ht_flag = flg;
1446     break;
1447   case MAT_SYMMETRIC:
1448   case MAT_STRUCTURALLY_SYMMETRIC:
1449   case MAT_HERMITIAN:
1450   case MAT_SYMMETRY_ETERNAL:
1451     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1452     break;
1453   default:
1454     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1461 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1462 {
1463   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1464   Mat_SeqBAIJ    *Aloc;
1465   Mat            B;
1466   PetscErrorCode ierr;
1467   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1468   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
1469   MatScalar      *a;
1470 
1471   PetscFunctionBegin;
1472   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1473   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1474     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1475     ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1476     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1477     ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1478   } else {
1479     B = *matout;
1480   }
1481 
1482   /* copy over the A part */
1483   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1484   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1485   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1486 
1487   for (i=0; i<mbs; i++) {
1488     rvals[0] = bs*(baij->rstartbs + i);
1489     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1490     for (j=ai[i]; j<ai[i+1]; j++) {
1491       col = (baij->cstartbs+aj[j])*bs;
1492       for (k=0; k<bs; k++) {
1493         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1494         col++; a += bs;
1495       }
1496     }
1497   }
1498   /* copy over the B part */
1499   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1500   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1501   for (i=0; i<mbs; i++) {
1502     rvals[0] = bs*(baij->rstartbs + i);
1503     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1504     for (j=ai[i]; j<ai[i+1]; j++) {
1505       col = baij->garray[aj[j]]*bs;
1506       for (k=0; k<bs; k++) {
1507         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1508         col++; a += bs;
1509       }
1510     }
1511   }
1512   ierr = PetscFree(rvals);CHKERRQ(ierr);
1513   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1514   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1515 
1516   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1517     *matout = B;
1518   } else {
1519     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1526 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1527 {
1528   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1529   Mat            a = baij->A,b = baij->B;
1530   PetscErrorCode ierr;
1531   PetscInt       s1,s2,s3;
1532 
1533   PetscFunctionBegin;
1534   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1535   if (rr) {
1536     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1537     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1538     /* Overlap communication with computation. */
1539     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1540   }
1541   if (ll) {
1542     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1543     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1544     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1545   }
1546   /* scale  the diagonal block */
1547   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1548 
1549   if (rr) {
1550     /* Do a scatter end and then right scale the off-diagonal block */
1551     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1553   }
1554 
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1560 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1561 {
1562   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1563   PetscErrorCode ierr;
1564   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1565   PetscInt       i,*owners = A->rmap.range;
1566   PetscInt       *nprocs,j,idx,nsends,row;
1567   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1568   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1569   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1570   MPI_Comm       comm = ((PetscObject)A)->comm;
1571   MPI_Request    *send_waits,*recv_waits;
1572   MPI_Status     recv_status,*send_status;
1573 #if defined(PETSC_DEBUG)
1574   PetscTruth     found = PETSC_FALSE;
1575 #endif
1576 
1577   PetscFunctionBegin;
1578   /*  first count number of contributors to each processor */
1579   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1580   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1581   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1582   j = 0;
1583   for (i=0; i<N; i++) {
1584     if (lastidx > (idx = rows[i])) j = 0;
1585     lastidx = idx;
1586     for (; j<size; j++) {
1587       if (idx >= owners[j] && idx < owners[j+1]) {
1588         nprocs[2*j]++;
1589         nprocs[2*j+1] = 1;
1590         owner[i] = j;
1591 #if defined(PETSC_DEBUG)
1592         found = PETSC_TRUE;
1593 #endif
1594         break;
1595       }
1596     }
1597 #if defined(PETSC_DEBUG)
1598     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1599     found = PETSC_FALSE;
1600 #endif
1601   }
1602   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1603 
1604   /* inform other processors of number of messages and max length*/
1605   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1606 
1607   /* post receives:   */
1608   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1609   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1610   for (i=0; i<nrecvs; i++) {
1611     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1612   }
1613 
1614   /* do sends:
1615      1) starts[i] gives the starting index in svalues for stuff going to
1616      the ith processor
1617   */
1618   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1619   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1620   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1621   starts[0]  = 0;
1622   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1623   for (i=0; i<N; i++) {
1624     svalues[starts[owner[i]]++] = rows[i];
1625   }
1626 
1627   starts[0] = 0;
1628   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1629   count = 0;
1630   for (i=0; i<size; i++) {
1631     if (nprocs[2*i+1]) {
1632       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1633     }
1634   }
1635   ierr = PetscFree(starts);CHKERRQ(ierr);
1636 
1637   base = owners[rank];
1638 
1639   /*  wait on receives */
1640   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1641   source = lens + nrecvs;
1642   count  = nrecvs; slen = 0;
1643   while (count) {
1644     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1645     /* unpack receives into our local space */
1646     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1647     source[imdex]  = recv_status.MPI_SOURCE;
1648     lens[imdex]    = n;
1649     slen          += n;
1650     count--;
1651   }
1652   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1653 
1654   /* move the data into the send scatter */
1655   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1656   count = 0;
1657   for (i=0; i<nrecvs; i++) {
1658     values = rvalues + i*nmax;
1659     for (j=0; j<lens[i]; j++) {
1660       lrows[count++] = values[j] - base;
1661     }
1662   }
1663   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1664   ierr = PetscFree(lens);CHKERRQ(ierr);
1665   ierr = PetscFree(owner);CHKERRQ(ierr);
1666   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1667 
1668   /* actually zap the local rows */
1669   /*
1670         Zero the required rows. If the "diagonal block" of the matrix
1671      is square and the user wishes to set the diagonal we use separate
1672      code so that MatSetValues() is not called for each diagonal allocating
1673      new memory, thus calling lots of mallocs and slowing things down.
1674 
1675        Contributed by: Matthew Knepley
1676   */
1677   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1678   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1679   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1680     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1681   } else if (diag != 0.0) {
1682     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1683     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1684       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1685 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1686     }
1687     for (i=0; i<slen; i++) {
1688       row  = lrows[i] + rstart_bs;
1689       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1690     }
1691     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   } else {
1694     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1695   }
1696 
1697   ierr = PetscFree(lrows);CHKERRQ(ierr);
1698 
1699   /* wait on sends */
1700   if (nsends) {
1701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1703     ierr = PetscFree(send_status);CHKERRQ(ierr);
1704   }
1705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1706   ierr = PetscFree(svalues);CHKERRQ(ierr);
1707 
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 #undef __FUNCT__
1712 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1713 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1714 {
1715   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1724 
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatEqual_MPIBAIJ"
1727 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1728 {
1729   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1730   Mat            a,b,c,d;
1731   PetscTruth     flg;
1732   PetscErrorCode ierr;
1733 
1734   PetscFunctionBegin;
1735   a = matA->A; b = matA->B;
1736   c = matB->A; d = matB->B;
1737 
1738   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1739   if (flg) {
1740     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1741   }
1742   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatCopy_MPIBAIJ"
1748 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1749 {
1750   PetscErrorCode ierr;
1751   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1752   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1753 
1754   PetscFunctionBegin;
1755   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1756   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1757     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1758   } else {
1759     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1760     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1761   }
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1767 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1768 {
1769   PetscErrorCode ierr;
1770 
1771   PetscFunctionBegin;
1772   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #include "petscblaslapack.h"
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1779 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1780 {
1781   PetscErrorCode ierr;
1782   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1783   PetscBLASInt   bnz,one=1;
1784   Mat_SeqBAIJ    *x,*y;
1785 
1786   PetscFunctionBegin;
1787   if (str == SAME_NONZERO_PATTERN) {
1788     PetscScalar alpha = a;
1789     x = (Mat_SeqBAIJ *)xx->A->data;
1790     y = (Mat_SeqBAIJ *)yy->A->data;
1791     bnz = PetscBLASIntCast(x->nz);
1792     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1793     x = (Mat_SeqBAIJ *)xx->B->data;
1794     y = (Mat_SeqBAIJ *)yy->B->data;
1795     bnz = PetscBLASIntCast(x->nz);
1796     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1797   } else {
1798     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1799   }
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1805 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1806 {
1807   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1808   PetscErrorCode ierr;
1809 
1810   PetscFunctionBegin;
1811   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1812   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1818 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1819 {
1820   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1821   PetscErrorCode ierr;
1822 
1823   PetscFunctionBegin;
1824   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1825   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 /* -------------------------------------------------------------------*/
1830 static struct _MatOps MatOps_Values = {
1831        MatSetValues_MPIBAIJ,
1832        MatGetRow_MPIBAIJ,
1833        MatRestoreRow_MPIBAIJ,
1834        MatMult_MPIBAIJ,
1835 /* 4*/ MatMultAdd_MPIBAIJ,
1836        MatMultTranspose_MPIBAIJ,
1837        MatMultTransposeAdd_MPIBAIJ,
1838        0,
1839        0,
1840        0,
1841 /*10*/ 0,
1842        0,
1843        0,
1844        0,
1845        MatTranspose_MPIBAIJ,
1846 /*15*/ MatGetInfo_MPIBAIJ,
1847        MatEqual_MPIBAIJ,
1848        MatGetDiagonal_MPIBAIJ,
1849        MatDiagonalScale_MPIBAIJ,
1850        MatNorm_MPIBAIJ,
1851 /*20*/ MatAssemblyBegin_MPIBAIJ,
1852        MatAssemblyEnd_MPIBAIJ,
1853        0,
1854        MatSetOption_MPIBAIJ,
1855        MatZeroEntries_MPIBAIJ,
1856 /*25*/ MatZeroRows_MPIBAIJ,
1857        0,
1858        0,
1859        0,
1860        0,
1861 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1862        0,
1863        0,
1864        0,
1865        0,
1866 /*35*/ MatDuplicate_MPIBAIJ,
1867        0,
1868        0,
1869        0,
1870        0,
1871 /*40*/ MatAXPY_MPIBAIJ,
1872        MatGetSubMatrices_MPIBAIJ,
1873        MatIncreaseOverlap_MPIBAIJ,
1874        MatGetValues_MPIBAIJ,
1875        MatCopy_MPIBAIJ,
1876 /*45*/ 0,
1877        MatScale_MPIBAIJ,
1878        0,
1879        0,
1880        0,
1881 /*50*/ 0,
1882        0,
1883        0,
1884        0,
1885        0,
1886 /*55*/ 0,
1887        0,
1888        MatSetUnfactored_MPIBAIJ,
1889        0,
1890        MatSetValuesBlocked_MPIBAIJ,
1891 /*60*/ 0,
1892        MatDestroy_MPIBAIJ,
1893        MatView_MPIBAIJ,
1894        0,
1895        0,
1896 /*65*/ 0,
1897        0,
1898        0,
1899        0,
1900        0,
1901 /*70*/ MatGetRowMaxAbs_MPIBAIJ,
1902        0,
1903        0,
1904        0,
1905        0,
1906 /*75*/ 0,
1907        0,
1908        0,
1909        0,
1910        0,
1911 /*80*/ 0,
1912        0,
1913        0,
1914        0,
1915        MatLoad_MPIBAIJ,
1916 /*85*/ 0,
1917        0,
1918        0,
1919        0,
1920        0,
1921 /*90*/ 0,
1922        0,
1923        0,
1924        0,
1925        0,
1926 /*95*/ 0,
1927        0,
1928        0,
1929        0,
1930        0,
1931 /*100*/0,
1932        0,
1933        0,
1934        0,
1935        0,
1936 /*105*/0,
1937        MatRealPart_MPIBAIJ,
1938        MatImaginaryPart_MPIBAIJ};
1939 
1940 
1941 EXTERN_C_BEGIN
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1944 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1945 {
1946   PetscFunctionBegin;
1947   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1948   *iscopy = PETSC_FALSE;
1949   PetscFunctionReturn(0);
1950 }
1951 EXTERN_C_END
1952 
1953 EXTERN_C_BEGIN
1954 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
1955 EXTERN_C_END
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
1959 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
1960 {
1961   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
1962   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
1963   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
1964   const PetscInt *JJ;
1965   PetscScalar    *values;
1966   PetscErrorCode ierr;
1967 
1968   PetscFunctionBegin;
1969 #if defined(PETSC_OPT_g)
1970   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
1971 #endif
1972   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1973   o_nnz = d_nnz + m;
1974 
1975   for (i=0; i<m; i++) {
1976     nnz     = Ii[i+1]- Ii[i];
1977     JJ      = J + Ii[i];
1978     nnz_max = PetscMax(nnz_max,nnz);
1979 #if defined(PETSC_OPT_g)
1980     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
1981 #endif
1982     for (j=0; j<nnz; j++) {
1983       if (*JJ >= cstart) break;
1984       JJ++;
1985     }
1986     d = 0;
1987     for (; j<nnz; j++) {
1988       if (*JJ++ >= cend) break;
1989       d++;
1990     }
1991     d_nnz[i] = d;
1992     o_nnz[i] = nnz - d;
1993   }
1994   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1995   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1996 
1997   if (v) values = (PetscScalar*)v;
1998   else {
1999     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2000     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2001   }
2002 
2003   for (i=0; i<m; i++) {
2004     ii   = i + rstart;
2005     nnz  = Ii[i+1]- Ii[i];
2006     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2007   }
2008   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2009   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2010 
2011   if (!v) {
2012     ierr = PetscFree(values);CHKERRQ(ierr);
2013   }
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 #undef __FUNCT__
2018 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2019 /*@C
2020    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2021    (the default parallel PETSc format).
2022 
2023    Collective on MPI_Comm
2024 
2025    Input Parameters:
2026 +  A - the matrix
2027 .  i - the indices into j for the start of each local row (starts with zero)
2028 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2029 -  v - optional values in the matrix
2030 
2031    Level: developer
2032 
2033 .keywords: matrix, aij, compressed row, sparse, parallel
2034 
2035 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2036 @*/
2037 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2038 {
2039   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2040 
2041   PetscFunctionBegin;
2042   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2043   if (f) {
2044     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2045   }
2046   PetscFunctionReturn(0);
2047 }
2048 
2049 EXTERN_C_BEGIN
2050 #undef __FUNCT__
2051 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2052 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2053 {
2054   Mat_MPIBAIJ    *b;
2055   PetscErrorCode ierr;
2056   PetscInt       i;
2057 
2058   PetscFunctionBegin;
2059   B->preallocated = PETSC_TRUE;
2060   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2061     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2062   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2063 
2064   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2065   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2066   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2067   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2068   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2069 
2070   B->rmap.bs  = bs;
2071   B->cmap.bs  = bs;
2072   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2073   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2074 
2075   if (d_nnz) {
2076     for (i=0; i<B->rmap.n/bs; i++) {
2077       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2078     }
2079   }
2080   if (o_nnz) {
2081     for (i=0; i<B->rmap.n/bs; i++) {
2082       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2083     }
2084   }
2085 
2086   b = (Mat_MPIBAIJ*)B->data;
2087   b->bs2 = bs*bs;
2088   b->mbs = B->rmap.n/bs;
2089   b->nbs = B->cmap.n/bs;
2090   b->Mbs = B->rmap.N/bs;
2091   b->Nbs = B->cmap.N/bs;
2092 
2093   for (i=0; i<=b->size; i++) {
2094     b->rangebs[i] = B->rmap.range[i]/bs;
2095   }
2096   b->rstartbs = B->rmap.rstart/bs;
2097   b->rendbs   = B->rmap.rend/bs;
2098   b->cstartbs = B->cmap.rstart/bs;
2099   b->cendbs   = B->cmap.rend/bs;
2100 
2101   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2102   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2103   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2104   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2105   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2106   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2107   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2108   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2109   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2110   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2111 
2112   ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2113 
2114   PetscFunctionReturn(0);
2115 }
2116 EXTERN_C_END
2117 
2118 EXTERN_C_BEGIN
2119 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2120 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2121 EXTERN_C_END
2122 
2123 /*MC
2124    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2125 
2126    Options Database Keys:
2127 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2128 . -mat_block_size <bs> - set the blocksize used to store the matrix
2129 - -mat_use_hash_table <fact>
2130 
2131   Level: beginner
2132 
2133 .seealso: MatCreateMPIBAIJ
2134 M*/
2135 
2136 EXTERN_C_BEGIN
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatCreate_MPIBAIJ"
2139 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2140 {
2141   Mat_MPIBAIJ    *b;
2142   PetscErrorCode ierr;
2143   PetscTruth     flg;
2144 
2145   PetscFunctionBegin;
2146   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2147   B->data = (void*)b;
2148 
2149 
2150   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2151   B->mapping    = 0;
2152   B->factor     = 0;
2153   B->assembled  = PETSC_FALSE;
2154 
2155   B->insertmode = NOT_SET_VALUES;
2156   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2157   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2158 
2159   /* build local table of row and column ownerships */
2160   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2161 
2162   /* build cache for off array entries formed */
2163   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2164   b->donotstash  = PETSC_FALSE;
2165   b->colmap      = PETSC_NULL;
2166   b->garray      = PETSC_NULL;
2167   b->roworiented = PETSC_TRUE;
2168 
2169   /* stuff used in block assembly */
2170   b->barray       = 0;
2171 
2172   /* stuff used for matrix vector multiply */
2173   b->lvec         = 0;
2174   b->Mvctx        = 0;
2175 
2176   /* stuff for MatGetRow() */
2177   b->rowindices   = 0;
2178   b->rowvalues    = 0;
2179   b->getrowactive = PETSC_FALSE;
2180 
2181   /* hash table stuff */
2182   b->ht           = 0;
2183   b->hd           = 0;
2184   b->ht_size      = 0;
2185   b->ht_flag      = PETSC_FALSE;
2186   b->ht_fact      = 0;
2187   b->ht_total_ct  = 0;
2188   b->ht_insert_ct = 0;
2189 
2190   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2191     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2192     if (flg) {
2193       PetscReal fact = 1.39;
2194       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2195       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2196       if (fact <= 1.0) fact = 1.39;
2197       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2198       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2199     }
2200   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2201 
2202   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2203                                      "MatStoreValues_MPIBAIJ",
2204                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2205   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2206                                      "MatRetrieveValues_MPIBAIJ",
2207                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2208   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2209                                      "MatGetDiagonalBlock_MPIBAIJ",
2210                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2211   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2212                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2213                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2214   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2215 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2216 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2217   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2218                                      "MatDiagonalScaleLocal_MPIBAIJ",
2219                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2220   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2221                                      "MatSetHashTableFactor_MPIBAIJ",
2222                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2223   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2224   PetscFunctionReturn(0);
2225 }
2226 EXTERN_C_END
2227 
2228 /*MC
2229    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2230 
2231    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2232    and MATMPIBAIJ otherwise.
2233 
2234    Options Database Keys:
2235 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2236 
2237   Level: beginner
2238 
2239 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2240 M*/
2241 
2242 EXTERN_C_BEGIN
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatCreate_BAIJ"
2245 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2246 {
2247   PetscErrorCode ierr;
2248   PetscMPIInt    size;
2249 
2250   PetscFunctionBegin;
2251   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2252   if (size == 1) {
2253     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2254   } else {
2255     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 EXTERN_C_END
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2263 /*@C
2264    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2265    (block compressed row).  For good matrix assembly performance
2266    the user should preallocate the matrix storage by setting the parameters
2267    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2268    performance can be increased by more than a factor of 50.
2269 
2270    Collective on Mat
2271 
2272    Input Parameters:
2273 +  A - the matrix
2274 .  bs   - size of blockk
2275 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2276            submatrix  (same for all local rows)
2277 .  d_nnz - array containing the number of block nonzeros in the various block rows
2278            of the in diagonal portion of the local (possibly different for each block
2279            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2280 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2281            submatrix (same for all local rows).
2282 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2283            off-diagonal portion of the local submatrix (possibly different for
2284            each block row) or PETSC_NULL.
2285 
2286    If the *_nnz parameter is given then the *_nz parameter is ignored
2287 
2288    Options Database Keys:
2289 +   -mat_block_size - size of the blocks to use
2290 -   -mat_use_hash_table <fact>
2291 
2292    Notes:
2293    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2294    than it must be used on all processors that share the object for that argument.
2295 
2296    Storage Information:
2297    For a square global matrix we define each processor's diagonal portion
2298    to be its local rows and the corresponding columns (a square submatrix);
2299    each processor's off-diagonal portion encompasses the remainder of the
2300    local matrix (a rectangular submatrix).
2301 
2302    The user can specify preallocated storage for the diagonal part of
2303    the local submatrix with either d_nz or d_nnz (not both).  Set
2304    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2305    memory allocation.  Likewise, specify preallocated storage for the
2306    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2307 
2308    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2309    the figure below we depict these three local rows and all columns (0-11).
2310 
2311 .vb
2312            0 1 2 3 4 5 6 7 8 9 10 11
2313           -------------------
2314    row 3  |  o o o d d d o o o o o o
2315    row 4  |  o o o d d d o o o o o o
2316    row 5  |  o o o d d d o o o o o o
2317           -------------------
2318 .ve
2319 
2320    Thus, any entries in the d locations are stored in the d (diagonal)
2321    submatrix, and any entries in the o locations are stored in the
2322    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2323    stored simply in the MATSEQBAIJ format for compressed row storage.
2324 
2325    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2326    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2327    In general, for PDE problems in which most nonzeros are near the diagonal,
2328    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2329    or you will get TERRIBLE performance; see the users' manual chapter on
2330    matrices.
2331 
2332    You can call MatGetInfo() to get information on how effective the preallocation was;
2333    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2334    You can also run with the option -info and look for messages with the string
2335    malloc in them to see if additional memory allocation was needed.
2336 
2337    Level: intermediate
2338 
2339 .keywords: matrix, block, aij, compressed row, sparse, parallel
2340 
2341 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2342 @*/
2343 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2344 {
2345   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2346 
2347   PetscFunctionBegin;
2348   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2349   if (f) {
2350     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2351   }
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 #undef __FUNCT__
2356 #define __FUNCT__ "MatCreateMPIBAIJ"
2357 /*@C
2358    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2359    (block compressed row).  For good matrix assembly performance
2360    the user should preallocate the matrix storage by setting the parameters
2361    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2362    performance can be increased by more than a factor of 50.
2363 
2364    Collective on MPI_Comm
2365 
2366    Input Parameters:
2367 +  comm - MPI communicator
2368 .  bs   - size of blockk
2369 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2370            This value should be the same as the local size used in creating the
2371            y vector for the matrix-vector product y = Ax.
2372 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2373            This value should be the same as the local size used in creating the
2374            x vector for the matrix-vector product y = Ax.
2375 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2376 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2377 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2378            submatrix  (same for all local rows)
2379 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2380            of the in diagonal portion of the local (possibly different for each block
2381            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2382 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2383            submatrix (same for all local rows).
2384 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2385            off-diagonal portion of the local submatrix (possibly different for
2386            each block row) or PETSC_NULL.
2387 
2388    Output Parameter:
2389 .  A - the matrix
2390 
2391    Options Database Keys:
2392 +   -mat_block_size - size of the blocks to use
2393 -   -mat_use_hash_table <fact>
2394 
2395    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2396    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2397    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2398    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2399 
2400    Notes:
2401    If the *_nnz parameter is given then the *_nz parameter is ignored
2402 
2403    A nonzero block is any block that as 1 or more nonzeros in it
2404 
2405    The user MUST specify either the local or global matrix dimensions
2406    (possibly both).
2407 
2408    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2409    than it must be used on all processors that share the object for that argument.
2410 
2411    Storage Information:
2412    For a square global matrix we define each processor's diagonal portion
2413    to be its local rows and the corresponding columns (a square submatrix);
2414    each processor's off-diagonal portion encompasses the remainder of the
2415    local matrix (a rectangular submatrix).
2416 
2417    The user can specify preallocated storage for the diagonal part of
2418    the local submatrix with either d_nz or d_nnz (not both).  Set
2419    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2420    memory allocation.  Likewise, specify preallocated storage for the
2421    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2422 
2423    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2424    the figure below we depict these three local rows and all columns (0-11).
2425 
2426 .vb
2427            0 1 2 3 4 5 6 7 8 9 10 11
2428           -------------------
2429    row 3  |  o o o d d d o o o o o o
2430    row 4  |  o o o d d d o o o o o o
2431    row 5  |  o o o d d d o o o o o o
2432           -------------------
2433 .ve
2434 
2435    Thus, any entries in the d locations are stored in the d (diagonal)
2436    submatrix, and any entries in the o locations are stored in the
2437    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2438    stored simply in the MATSEQBAIJ format for compressed row storage.
2439 
2440    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2441    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2442    In general, for PDE problems in which most nonzeros are near the diagonal,
2443    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2444    or you will get TERRIBLE performance; see the users' manual chapter on
2445    matrices.
2446 
2447    Level: intermediate
2448 
2449 .keywords: matrix, block, aij, compressed row, sparse, parallel
2450 
2451 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2452 @*/
2453 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2454 {
2455   PetscErrorCode ierr;
2456   PetscMPIInt    size;
2457 
2458   PetscFunctionBegin;
2459   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2460   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2461   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2462   if (size > 1) {
2463     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2464     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2465   } else {
2466     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2467     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2468   }
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2474 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2475 {
2476   Mat            mat;
2477   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2478   PetscErrorCode ierr;
2479   PetscInt       len=0;
2480 
2481   PetscFunctionBegin;
2482   *newmat       = 0;
2483   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2484   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2485   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2486   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2487 
2488   mat->factor       = matin->factor;
2489   mat->preallocated = PETSC_TRUE;
2490   mat->assembled    = PETSC_TRUE;
2491   mat->insertmode   = NOT_SET_VALUES;
2492 
2493   a      = (Mat_MPIBAIJ*)mat->data;
2494   mat->rmap.bs  = matin->rmap.bs;
2495   a->bs2   = oldmat->bs2;
2496   a->mbs   = oldmat->mbs;
2497   a->nbs   = oldmat->nbs;
2498   a->Mbs   = oldmat->Mbs;
2499   a->Nbs   = oldmat->Nbs;
2500 
2501   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2502   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2503 
2504   a->size         = oldmat->size;
2505   a->rank         = oldmat->rank;
2506   a->donotstash   = oldmat->donotstash;
2507   a->roworiented  = oldmat->roworiented;
2508   a->rowindices   = 0;
2509   a->rowvalues    = 0;
2510   a->getrowactive = PETSC_FALSE;
2511   a->barray       = 0;
2512   a->rstartbs     = oldmat->rstartbs;
2513   a->rendbs       = oldmat->rendbs;
2514   a->cstartbs     = oldmat->cstartbs;
2515   a->cendbs       = oldmat->cendbs;
2516 
2517   /* hash table stuff */
2518   a->ht           = 0;
2519   a->hd           = 0;
2520   a->ht_size      = 0;
2521   a->ht_flag      = oldmat->ht_flag;
2522   a->ht_fact      = oldmat->ht_fact;
2523   a->ht_total_ct  = 0;
2524   a->ht_insert_ct = 0;
2525 
2526   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2527   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2528   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2529   if (oldmat->colmap) {
2530 #if defined (PETSC_USE_CTABLE)
2531   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2532 #else
2533   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2534   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2535   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2536 #endif
2537   } else a->colmap = 0;
2538 
2539   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2540     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2541     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2542     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2543   } else a->garray = 0;
2544 
2545   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2546   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2547   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2548   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2549 
2550   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2551   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2552   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2553   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2554   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2555   *newmat = mat;
2556 
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #include "petscsys.h"
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatLoad_MPIBAIJ"
2564 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2565 {
2566   Mat            A;
2567   PetscErrorCode ierr;
2568   int            fd;
2569   PetscInt       i,nz,j,rstart,rend;
2570   PetscScalar    *vals,*buf;
2571   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2572   MPI_Status     status;
2573   PetscMPIInt    rank,size,maxnz;
2574   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2575   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2576   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2577   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2578   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2579   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2580 
2581   PetscFunctionBegin;
2582   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
2583     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2584   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2585 
2586   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2587   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2588   if (!rank) {
2589     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2590     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2591     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2592   }
2593 
2594   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2595   M = header[1]; N = header[2];
2596 
2597   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2598 
2599   /*
2600      This code adds extra rows to make sure the number of rows is
2601      divisible by the blocksize
2602   */
2603   Mbs        = M/bs;
2604   extra_rows = bs - M + bs*Mbs;
2605   if (extra_rows == bs) extra_rows = 0;
2606   else                  Mbs++;
2607   if (extra_rows && !rank) {
2608     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2609   }
2610 
2611   /* determine ownership of all rows */
2612   mbs        = Mbs/size + ((Mbs % size) > rank);
2613   m          = mbs*bs;
2614   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2615   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2616 
2617   /* process 0 needs enough room for process with most rows */
2618   if (!rank) {
2619     mmax = rowners[1];
2620     for (i=2; i<size; i++) {
2621       mmax = PetscMax(mmax,rowners[i]);
2622     }
2623     mmax*=bs;
2624   } else mmax = m;
2625 
2626   rowners[0] = 0;
2627   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2628   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2629   rstart = rowners[rank];
2630   rend   = rowners[rank+1];
2631 
2632   /* distribute row lengths to all processors */
2633   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2634   if (!rank) {
2635     mend = m;
2636     if (size == 1) mend = mend - extra_rows;
2637     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2638     for (j=mend; j<m; j++) locrowlens[j] = 1;
2639     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2640     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2641     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2642     for (j=0; j<m; j++) {
2643       procsnz[0] += locrowlens[j];
2644     }
2645     for (i=1; i<size; i++) {
2646       mend = browners[i+1] - browners[i];
2647       if (i == size-1) mend = mend - extra_rows;
2648       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2649       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2650       /* calculate the number of nonzeros on each processor */
2651       for (j=0; j<browners[i+1]-browners[i]; j++) {
2652         procsnz[i] += rowlengths[j];
2653       }
2654       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2655     }
2656     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2657   } else {
2658     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2659   }
2660 
2661   if (!rank) {
2662     /* determine max buffer needed and allocate it */
2663     maxnz = procsnz[0];
2664     for (i=1; i<size; i++) {
2665       maxnz = PetscMax(maxnz,procsnz[i]);
2666     }
2667     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2668 
2669     /* read in my part of the matrix column indices  */
2670     nz     = procsnz[0];
2671     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2672     mycols = ibuf;
2673     if (size == 1)  nz -= extra_rows;
2674     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2675     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2676 
2677     /* read in every ones (except the last) and ship off */
2678     for (i=1; i<size-1; i++) {
2679       nz   = procsnz[i];
2680       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2681       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2682     }
2683     /* read in the stuff for the last proc */
2684     if (size != 1) {
2685       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2686       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2687       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2688       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2689     }
2690     ierr = PetscFree(cols);CHKERRQ(ierr);
2691   } else {
2692     /* determine buffer space needed for message */
2693     nz = 0;
2694     for (i=0; i<m; i++) {
2695       nz += locrowlens[i];
2696     }
2697     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2698     mycols = ibuf;
2699     /* receive message of column indices*/
2700     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2701     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2702     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2703   }
2704 
2705   /* loop over local rows, determining number of off diagonal entries */
2706   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2707   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2708   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2709   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2710   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2711   rowcount = 0; nzcount = 0;
2712   for (i=0; i<mbs; i++) {
2713     dcount  = 0;
2714     odcount = 0;
2715     for (j=0; j<bs; j++) {
2716       kmax = locrowlens[rowcount];
2717       for (k=0; k<kmax; k++) {
2718         tmp = mycols[nzcount++]/bs;
2719         if (!mask[tmp]) {
2720           mask[tmp] = 1;
2721           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2722           else masked1[dcount++] = tmp;
2723         }
2724       }
2725       rowcount++;
2726     }
2727 
2728     dlens[i]  = dcount;
2729     odlens[i] = odcount;
2730 
2731     /* zero out the mask elements we set */
2732     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2733     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2734   }
2735 
2736   /* create our matrix */
2737   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2738   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2739   ierr = MatSetType(A,type);CHKERRQ(ierr)
2740   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2741 
2742   if (!rank) {
2743     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2744     /* read in my part of the matrix numerical values  */
2745     nz = procsnz[0];
2746     vals = buf;
2747     mycols = ibuf;
2748     if (size == 1)  nz -= extra_rows;
2749     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2750     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2751 
2752     /* insert into matrix */
2753     jj      = rstart*bs;
2754     for (i=0; i<m; i++) {
2755       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2756       mycols += locrowlens[i];
2757       vals   += locrowlens[i];
2758       jj++;
2759     }
2760     /* read in other processors (except the last one) and ship out */
2761     for (i=1; i<size-1; i++) {
2762       nz   = procsnz[i];
2763       vals = buf;
2764       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2765       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2766     }
2767     /* the last proc */
2768     if (size != 1){
2769       nz   = procsnz[i] - extra_rows;
2770       vals = buf;
2771       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2772       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2773       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2774     }
2775     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2776   } else {
2777     /* receive numeric values */
2778     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2779 
2780     /* receive message of values*/
2781     vals   = buf;
2782     mycols = ibuf;
2783     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2784     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2785     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2786 
2787     /* insert into matrix */
2788     jj      = rstart*bs;
2789     for (i=0; i<m; i++) {
2790       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2791       mycols += locrowlens[i];
2792       vals   += locrowlens[i];
2793       jj++;
2794     }
2795   }
2796   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2797   ierr = PetscFree(buf);CHKERRQ(ierr);
2798   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2799   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2800   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2801   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2802   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2803   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2804 
2805   *newmat = A;
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 #undef __FUNCT__
2810 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2811 /*@
2812    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2813 
2814    Input Parameters:
2815 .  mat  - the matrix
2816 .  fact - factor
2817 
2818    Collective on Mat
2819 
2820    Level: advanced
2821 
2822   Notes:
2823    This can also be set by the command line option: -mat_use_hash_table <fact>
2824 
2825 .keywords: matrix, hashtable, factor, HT
2826 
2827 .seealso: MatSetOption()
2828 @*/
2829 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2830 {
2831   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2832 
2833   PetscFunctionBegin;
2834   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2835   if (f) {
2836     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2837   }
2838   PetscFunctionReturn(0);
2839 }
2840 
2841 EXTERN_C_BEGIN
2842 #undef __FUNCT__
2843 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2844 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2845 {
2846   Mat_MPIBAIJ *baij;
2847 
2848   PetscFunctionBegin;
2849   baij = (Mat_MPIBAIJ*)mat->data;
2850   baij->ht_fact = fact;
2851   PetscFunctionReturn(0);
2852 }
2853 EXTERN_C_END
2854 
2855 #undef __FUNCT__
2856 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2857 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2858 {
2859   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2860   PetscFunctionBegin;
2861   *Ad     = a->A;
2862   *Ao     = a->B;
2863   *colmap = a->garray;
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 /*
2868     Special version for direct calls from Fortran (to eliminate two function call overheads
2869 */
2870 #if defined(PETSC_HAVE_FORTRAN_CAPS)
2871 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
2872 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
2873 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
2874 #endif
2875 
2876 #undef __FUNCT__
2877 #define __FUNCT__ "matmpibiajsetvaluesblocked"
2878 /*@C
2879   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
2880 
2881   Collective on Mat
2882 
2883   Input Parameters:
2884 + mat - the matrix
2885 . min - number of input rows
2886 . im - input rows
2887 . nin - number of input columns
2888 . in - input columns
2889 . v - numerical values input
2890 - addvin - INSERT_VALUES or ADD_VALUES
2891 
2892   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
2893 
2894   Level: advanced
2895 
2896 .seealso:   MatSetValuesBlocked()
2897 @*/
2898 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
2899 {
2900   /* convert input arguments to C version */
2901   Mat             mat = *matin;
2902   PetscInt        m = *min, n = *nin;
2903   InsertMode      addv = *addvin;
2904 
2905   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
2906   const MatScalar *value;
2907   MatScalar       *barray=baij->barray;
2908   PetscTruth      roworiented = baij->roworiented;
2909   PetscErrorCode  ierr;
2910   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
2911   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
2912   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
2913 
2914   PetscFunctionBegin;
2915   /* tasks normally handled by MatSetValuesBlocked() */
2916   if (mat->insertmode == NOT_SET_VALUES) {
2917     mat->insertmode = addv;
2918   }
2919 #if defined(PETSC_USE_DEBUG)
2920   else if (mat->insertmode != addv) {
2921     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
2922   }
2923   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2924 #endif
2925   if (mat->assembled) {
2926     mat->was_assembled = PETSC_TRUE;
2927     mat->assembled     = PETSC_FALSE;
2928   }
2929   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
2930 
2931 
2932   if(!barray) {
2933     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
2934     baij->barray = barray;
2935   }
2936 
2937   if (roworiented) {
2938     stepval = (n-1)*bs;
2939   } else {
2940     stepval = (m-1)*bs;
2941   }
2942   for (i=0; i<m; i++) {
2943     if (im[i] < 0) continue;
2944 #if defined(PETSC_USE_DEBUG)
2945     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
2946 #endif
2947     if (im[i] >= rstart && im[i] < rend) {
2948       row = im[i] - rstart;
2949       for (j=0; j<n; j++) {
2950         /* If NumCol = 1 then a copy is not required */
2951         if ((roworiented) && (n == 1)) {
2952           barray = (MatScalar*)v + i*bs2;
2953         } else if((!roworiented) && (m == 1)) {
2954           barray = (MatScalar*)v + j*bs2;
2955         } else { /* Here a copy is required */
2956           if (roworiented) {
2957             value = v + i*(stepval+bs)*bs + j*bs;
2958           } else {
2959             value = v + j*(stepval+bs)*bs + i*bs;
2960           }
2961           for (ii=0; ii<bs; ii++,value+=stepval) {
2962             for (jj=0; jj<bs; jj++) {
2963               *barray++  = *value++;
2964             }
2965           }
2966           barray -=bs2;
2967         }
2968 
2969         if (in[j] >= cstart && in[j] < cend){
2970           col  = in[j] - cstart;
2971           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
2972         }
2973         else if (in[j] < 0) continue;
2974 #if defined(PETSC_USE_DEBUG)
2975         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
2976 #endif
2977         else {
2978           if (mat->was_assembled) {
2979             if (!baij->colmap) {
2980               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2981             }
2982 
2983 #if defined(PETSC_USE_DEBUG)
2984 #if defined (PETSC_USE_CTABLE)
2985             { PetscInt data;
2986               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
2987               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
2988             }
2989 #else
2990             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
2991 #endif
2992 #endif
2993 #if defined (PETSC_USE_CTABLE)
2994 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
2995             col  = (col - 1)/bs;
2996 #else
2997             col = (baij->colmap[in[j]] - 1)/bs;
2998 #endif
2999             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3000               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3001               col =  in[j];
3002             }
3003           }
3004           else col = in[j];
3005           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3006         }
3007       }
3008     } else {
3009       if (!baij->donotstash) {
3010         if (roworiented) {
3011           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3012         } else {
3013           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3014         }
3015       }
3016     }
3017   }
3018 
3019   /* task normally handled by MatSetValuesBlocked() */
3020   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3021   PetscFunctionReturn(0);
3022 }
3023