xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ba337c4413f444c9b07b767e9700a1bd83f660a1)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
19 {
20   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       i,*idxb = 0;
23   PetscScalar    *va,*vb;
24   Vec            vtmp;
25 
26   PetscFunctionBegin;
27   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
28   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
29   if (idx) {
30     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++){
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
40   }
41 
42   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
43   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
44   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
45   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 EXTERN_C_BEGIN
50 #undef __FUNCT__
51 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
53 {
54   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
59   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
60   PetscFunctionReturn(0);
61 }
62 EXTERN_C_END
63 
64 EXTERN_C_BEGIN
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 EXTERN_C_END
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined (PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++){
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define CHUNKSIZE  10
110 
111 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
112 { \
113  \
114     brow = row/bs;  \
115     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
116     rmax = aimax[brow]; nrow = ailen[brow]; \
117       bcol = col/bs; \
118       ridx = row % bs; cidx = col % bs; \
119       low = 0; high = nrow; \
120       while (high-low > 3) { \
121         t = (low+high)/2; \
122         if (rp[t] > bcol) high = t; \
123         else              low  = t; \
124       } \
125       for (_i=low; _i<high; _i++) { \
126         if (rp[_i] > bcol) break; \
127         if (rp[_i] == bcol) { \
128           bap  = ap +  bs2*_i + bs*cidx + ridx; \
129           if (addv == ADD_VALUES) *bap += value;  \
130           else                    *bap  = value;  \
131           goto a_noinsert; \
132         } \
133       } \
134       if (a->nonew == 1) goto a_noinsert; \
135       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
136       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
137       N = nrow++ - 1;  \
138       /* shift up all the later entries in this row */ \
139       for (ii=N; ii>=_i; ii--) { \
140         rp[ii+1] = rp[ii]; \
141         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
142       } \
143       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
144       rp[_i]                      = bcol;  \
145       ap[bs2*_i + bs*cidx + ridx] = value;  \
146       a_noinsert:; \
147     ailen[brow] = nrow; \
148 }
149 
150 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
151 { \
152     brow = row/bs;  \
153     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
154     rmax = bimax[brow]; nrow = bilen[brow]; \
155       bcol = col/bs; \
156       ridx = row % bs; cidx = col % bs; \
157       low = 0; high = nrow; \
158       while (high-low > 3) { \
159         t = (low+high)/2; \
160         if (rp[t] > bcol) high = t; \
161         else              low  = t; \
162       } \
163       for (_i=low; _i<high; _i++) { \
164         if (rp[_i] > bcol) break; \
165         if (rp[_i] == bcol) { \
166           bap  = ap +  bs2*_i + bs*cidx + ridx; \
167           if (addv == ADD_VALUES) *bap += value;  \
168           else                    *bap  = value;  \
169           goto b_noinsert; \
170         } \
171       } \
172       if (b->nonew == 1) goto b_noinsert; \
173       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
174       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
175       CHKMEMQ;\
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for (ii=N; ii>=_i; ii--) { \
179         rp[ii+1] = rp[ii]; \
180         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
181       } \
182       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatSetValues_MPIBAIJ"
191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
192 {
193   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
194   MatScalar      value;
195   PetscTruth     roworiented = baij->roworiented;
196   PetscErrorCode ierr;
197   PetscInt       i,j,row,col;
198   PetscInt       rstart_orig=mat->rmap->rstart;
199   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
200   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
201 
202   /* Some Variables required in the macro */
203   Mat            A = baij->A;
204   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
205   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
206   MatScalar      *aa=a->a;
207 
208   Mat            B = baij->B;
209   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
210   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
211   MatScalar      *ba=b->a;
212 
213   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
214   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
215   MatScalar      *ap,*bap;
216 
217   PetscFunctionBegin;
218   if (v) PetscValidScalarPointer(v,6);
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_DEBUG)
222     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
223 #endif
224     if (im[i] >= rstart_orig && im[i] < rend_orig) {
225       row = im[i] - rstart_orig;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart_orig && in[j] < cend_orig){
228           col = in[j] - cstart_orig;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_DEBUG)
234         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!baij->colmap) {
239               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
243             col  = col - 1;
244 #else
245             col = baij->colmap[in[j]/bs] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
248               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
251               B = baij->B;
252               b = (Mat_SeqBAIJ*)(B)->data;
253               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
254               ba=b->a;
255             } else col += in[j]%bs;
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscTruth        roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + i*(stepval+bs)*bs + j*bs;
315           } else {
316             value = v + j*(stepval+bs)*bs + i*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=stepval) {
319             for (jj=0; jj<bs; jj++) {
320               *barray++  = *value++;
321             }
322           }
323           barray -=bs2;
324         }
325 
326         if (in[j] >= cstart && in[j] < cend){
327           col  = in[j] - cstart;
328           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
329         }
330         else if (in[j] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
333 #endif
334         else {
335           if (mat->was_assembled) {
336             if (!baij->colmap) {
337               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
338             }
339 
340 #if defined(PETSC_USE_DEBUG)
341 #if defined (PETSC_USE_CTABLE)
342             { PetscInt data;
343               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
344               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
345             }
346 #else
347             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
348 #endif
349 #endif
350 #if defined (PETSC_USE_CTABLE)
351 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
352             col  = (col - 1)/bs;
353 #else
354             col = (baij->colmap[in[j]] - 1)/bs;
355 #endif
356             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
357               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
358               col =  in[j];
359             }
360           }
361           else col = in[j];
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364       }
365     } else {
366       if (!baij->donotstash) {
367         if (roworiented) {
368           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         } else {
370           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         }
372       }
373     }
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #define HASH_KEY 0.6180339887
379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
385 {
386   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
387   PetscTruth     roworiented = baij->roworiented;
388   PetscErrorCode ierr;
389   PetscInt       i,j,row,col;
390   PetscInt       rstart_orig=mat->rmap->rstart;
391   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
392   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
393   PetscReal      tmp;
394   MatScalar      **HD = baij->hd,value;
395 #if defined(PETSC_USE_DEBUG)
396   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
397 #endif
398 
399   PetscFunctionBegin;
400   if (v) PetscValidScalarPointer(v,6);
401   for (i=0; i<m; i++) {
402 #if defined(PETSC_USE_DEBUG)
403     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
404     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
405 #endif
406       row = im[i];
407     if (row >= rstart_orig && row < rend_orig) {
408       for (j=0; j<n; j++) {
409         col = in[j];
410         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411         /* Look up PetscInto the Hash Table */
412         key = (row/bs)*Nbs+(col/bs)+1;
413         h1  = HASH(size,key,tmp);
414 
415 
416         idx = h1;
417 #if defined(PETSC_USE_DEBUG)
418         insert_ct++;
419         total_ct++;
420         if (HT[idx] != key) {
421           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
422           if (idx == size) {
423             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
424             if (idx == h1) {
425               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
426             }
427           }
428         }
429 #else
430         if (HT[idx] != key) {
431           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
432           if (idx == size) {
433             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
434             if (idx == h1) {
435               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436             }
437           }
438         }
439 #endif
440         /* A HASH table entry is found, so insert the values at the correct address */
441         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
442         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454 #if defined(PETSC_USE_DEBUG)
455   baij->ht_total_ct = total_ct;
456   baij->ht_insert_ct = insert_ct;
457 #endif
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
466   PetscTruth        roworiented = baij->roworiented;
467   PetscErrorCode    ierr;
468   PetscInt          i,j,ii,jj,row,col;
469   PetscInt          rstart=baij->rstartbs;
470   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
471   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
472   PetscReal         tmp;
473   MatScalar         **HD = baij->hd,*baij_a;
474   const PetscScalar *v_t,*value;
475 #if defined(PETSC_USE_DEBUG)
476   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
477 #endif
478 
479   PetscFunctionBegin;
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487 #if defined(PETSC_USE_DEBUG)
488     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     row   = im[i];
492     v_t   = v + i*nbs2;
493     if (row >= rstart && row < rend) {
494       for (j=0; j<n; j++) {
495         col = in[j];
496 
497         /* Look up into the Hash Table */
498         key = row*Nbs+col+1;
499         h1  = HASH(size,key,tmp);
500 
501         idx = h1;
502 #if defined(PETSC_USE_DEBUG)
503         total_ct++;
504         insert_ct++;
505        if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
509             if (idx == h1) {
510               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
511             }
512           }
513         }
514 #else
515         if (HT[idx] != key) {
516           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
517           if (idx == size) {
518             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
519             if (idx == h1) {
520               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
521             }
522           }
523         }
524 #endif
525         baij_a = HD[idx];
526         if (roworiented) {
527           /*value = v + i*(stepval+bs)*bs + j*bs;*/
528           /* value = v + (i*(stepval+bs)+j)*bs; */
529           value = v_t;
530           v_t  += bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval) {
539               for (jj=ii; jj<bs2; jj+=bs) {
540                 baij_a[jj]  = *value++;
541               }
542             }
543           }
544         } else {
545           value = v + j*(stepval+bs)*bs + i*bs;
546           if (addv == ADD_VALUES) {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  += *value++;
550               }
551             }
552           } else {
553             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
554               for (jj=0; jj<bs; jj++) {
555                 baij_a[jj]  = *value++;
556               }
557             }
558           }
559         }
560       }
561     } else {
562       if (!baij->donotstash) {
563         if (roworiented) {
564           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
565         } else {
566           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571 #if defined(PETSC_USE_DEBUG)
572   baij->ht_total_ct = total_ct;
573   baij->ht_insert_ct = insert_ct;
574 #endif
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetValues_MPIBAIJ"
580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
581 {
582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583   PetscErrorCode ierr;
584   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
585   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
586 
587   PetscFunctionBegin;
588   for (i=0; i<m; i++) {
589     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
590     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
591     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
592       row = idxm[i] - bsrstart;
593       for (j=0; j<n; j++) {
594         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
595         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
596         if (idxn[j] >= bscstart && idxn[j] < bscend){
597           col = idxn[j] - bscstart;
598           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599         } else {
600           if (!baij->colmap) {
601             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
602           }
603 #if defined (PETSC_USE_CTABLE)
604           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
605           data --;
606 #else
607           data = baij->colmap[idxn[j]/bs]-1;
608 #endif
609           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
610           else {
611             col  = data + idxn[j]%bs;
612             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
613           }
614         }
615       }
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
618     }
619   }
620  PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatNorm_MPIBAIJ"
625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
626 {
627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
628   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
631   PetscReal      sum = 0.0;
632   MatScalar      *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       nz = amat->nz*bs2;
641       for (i=0; i<nz; i++) {
642 #if defined(PETSC_USE_COMPLEX)
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644 #else
645         sum += (*v)*(*v); v++;
646 #endif
647       }
648       v = bmat->a;
649       nz = bmat->nz*bs2;
650       for (i=0; i<nz; i++) {
651 #if defined(PETSC_USE_COMPLEX)
652         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
653 #else
654         sum += (*v)*(*v); v++;
655 #endif
656       }
657       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       *nrm = sqrt(*nrm);
659     } else if (type == NORM_1) { /* max column sum */
660       PetscReal *tmp,*tmp2;
661       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
662       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
663       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
664       v = amat->a; jj = amat->j;
665       for (i=0; i<amat->nz; i++) {
666         for (j=0; j<bs; j++){
667           col = bs*(cstart + *jj) + j; /* column index */
668           for (row=0; row<bs; row++){
669             tmp[col] += PetscAbsScalar(*v);  v++;
670           }
671         }
672         jj++;
673       }
674       v = bmat->a; jj = bmat->j;
675       for (i=0; i<bmat->nz; i++) {
676         for (j=0; j<bs; j++){
677           col = bs*garray[*jj] + j;
678           for (row=0; row<bs; row++){
679             tmp[col] += PetscAbsScalar(*v); v++;
680           }
681         }
682         jj++;
683       }
684       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
685       *nrm = 0.0;
686       for (j=0; j<mat->cmap->N; j++) {
687         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688       }
689       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
690     } else if (type == NORM_INFINITY) { /* max row sum */
691       PetscReal *sums;
692       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
693       sum = 0.0;
694       for (j=0; j<amat->mbs; j++) {
695         for (row=0; row<bs; row++) sums[row] = 0.0;
696         v = amat->a + bs2*amat->i[j];
697         nz = amat->i[j+1]-amat->i[j];
698         for (i=0; i<nz; i++) {
699           for (col=0; col<bs; col++){
700             for (row=0; row<bs; row++){
701               sums[row] += PetscAbsScalar(*v); v++;
702             }
703           }
704         }
705         v = bmat->a + bs2*bmat->i[j];
706         nz = bmat->i[j+1]-bmat->i[j];
707         for (i=0; i<nz; i++) {
708           for (col=0; col<bs; col++){
709             for (row=0; row<bs; row++){
710               sums[row] += PetscAbsScalar(*v); v++;
711             }
712           }
713         }
714         for (row=0; row<bs; row++){
715           if (sums[row] > sum) sum = sums[row];
716         }
717       }
718       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
719       ierr = PetscFree(sums);CHKERRQ(ierr);
720     } else {
721       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
722     }
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   if (baij->ht) PetscFunctionReturn(0);
754 
755   baij->ht_size = (PetscInt)(factor*nz);
756   ht_size       = baij->ht_size;
757 
758   /* Allocate Memory for Hash Table */
759   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
762   HD   = baij->hd;
763   HT   = baij->ht;
764 
765   /* Loop Over A */
766   for (i=0; i<a->mbs; i++) {
767     for (j=ai[i]; j<ai[i+1]; j++) {
768       row = i+rstart;
769       col = aj[j]+cstart;
770 
771       key = row*Nbs + col + 1;
772       h1  = HASH(ht_size,key,tmp);
773       for (k=0; k<ht_size; k++){
774         if (!HT[(h1+k)%ht_size]) {
775           HT[(h1+k)%ht_size] = key;
776           HD[(h1+k)%ht_size] = a->a + j*bs2;
777           break;
778 #if defined(PETSC_USE_INFO)
779         } else {
780           ct++;
781 #endif
782         }
783       }
784 #if defined(PETSC_USE_INFO)
785       if (k> max) max = k;
786 #endif
787     }
788   }
789   /* Loop Over B */
790   for (i=0; i<b->mbs; i++) {
791     for (j=bi[i]; j<bi[i+1]; j++) {
792       row = i+rstart;
793       col = garray[bj[j]];
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++){
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = b->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812 
813   /* Print Summary */
814 #if defined(PETSC_USE_INFO)
815   for (i=0,j=0; i<ht_size; i++) {
816     if (HT[i]) {j++;}
817   }
818   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
819 #endif
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
828   PetscErrorCode ierr;
829   PetscInt       nstash,reallocs;
830   InsertMode     addv;
831 
832   PetscFunctionBegin;
833   if (baij->donotstash) {
834     PetscFunctionReturn(0);
835   }
836 
837   /* make sure all processors are either in INSERTMODE or ADDMODE */
838   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
839   if (addv == (ADD_VALUES|INSERT_VALUES)) {
840     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
841   }
842   mat->insertmode = addv; /* in case this processor had no cache */
843 
844   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
845   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
846   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
847   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
848   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
849   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 #undef __FUNCT__
854 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
855 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
856 {
857   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
858   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
859   PetscErrorCode ierr;
860   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
861   PetscInt       *row,*col;
862   PetscTruth     r1,r2,r3,other_disassembled;
863   MatScalar      *val;
864   InsertMode     addv = mat->insertmode;
865   PetscMPIInt    n;
866 
867   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
868   PetscFunctionBegin;
869   if (!baij->donotstash) {
870     while (1) {
871       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
872       if (!flg) break;
873 
874       for (i=0; i<n;) {
875         /* Now identify the consecutive vals belonging to the same row */
876         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
877         if (j < n) ncols = j-i;
878         else       ncols = n-i;
879         /* Now assemble all these values with a single function call */
880         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
881         i = j;
882       }
883     }
884     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
885     /* Now process the block-stash. Since the values are stashed column-oriented,
886        set the roworiented flag to column oriented, and after MatSetValues()
887        restore the original flags */
888     r1 = baij->roworiented;
889     r2 = a->roworiented;
890     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
891     baij->roworiented = PETSC_FALSE;
892     a->roworiented    = PETSC_FALSE;
893     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
894     while (1) {
895       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
896       if (!flg) break;
897 
898       for (i=0; i<n;) {
899         /* Now identify the consecutive vals belonging to the same row */
900         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
901         if (j < n) ncols = j-i;
902         else       ncols = n-i;
903         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
904         i = j;
905       }
906     }
907     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
908     baij->roworiented = r1;
909     a->roworiented    = r2;
910     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
911   }
912 
913   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
914   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
915 
916   /* determine if any processor has disassembled, if so we must
917      also disassemble ourselfs, in order that we may reassemble. */
918   /*
919      if nonzero structure of submatrix B cannot change then we know that
920      no processor disassembled thus we can skip this stuff
921   */
922   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
923     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
924     if (mat->was_assembled && !other_disassembled) {
925       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
926     }
927   }
928 
929   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
930     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
931   }
932   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
933   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
934   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
935 
936 #if defined(PETSC_USE_INFO)
937   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
938     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
939     baij->ht_total_ct  = 0;
940     baij->ht_insert_ct = 0;
941   }
942 #endif
943   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
944     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
945     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
946     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
947   }
948 
949   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
950   baij->rowvalues = 0;
951   PetscFunctionReturn(0);
952 }
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
956 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
957 {
958   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
959   PetscErrorCode    ierr;
960   PetscMPIInt       size = baij->size,rank = baij->rank;
961   PetscInt          bs = mat->rmap->bs;
962   PetscTruth        iascii,isdraw;
963   PetscViewer       sviewer;
964   PetscViewerFormat format;
965 
966   PetscFunctionBegin;
967   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
968   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
969   if (iascii) {
970     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
971     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
972       MatInfo info;
973       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
974       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
975       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
976               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
977               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
978       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
979       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
980       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
981       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
982       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
983       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
984       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_INFO) {
987       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
988       PetscFunctionReturn(0);
989     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
990       PetscFunctionReturn(0);
991     }
992   }
993 
994   if (isdraw) {
995     PetscDraw       draw;
996     PetscTruth isnull;
997     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
998     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
999   }
1000 
1001   if (size == 1) {
1002     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1003     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1004   } else {
1005     /* assemble the entire matrix onto first processor. */
1006     Mat         A;
1007     Mat_SeqBAIJ *Aloc;
1008     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1009     MatScalar   *a;
1010 
1011     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1012     /* Perhaps this should be the type of mat? */
1013     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1014     if (!rank) {
1015       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1016     } else {
1017       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1018     }
1019     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1020     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1021     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1022 
1023     /* copy over the A part */
1024     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1025     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1026     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1027 
1028     for (i=0; i<mbs; i++) {
1029       rvals[0] = bs*(baij->rstartbs + i);
1030       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1031       for (j=ai[i]; j<ai[i+1]; j++) {
1032         col = (baij->cstartbs+aj[j])*bs;
1033         for (k=0; k<bs; k++) {
1034           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1035           col++; a += bs;
1036         }
1037       }
1038     }
1039     /* copy over the B part */
1040     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1041     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1042     for (i=0; i<mbs; i++) {
1043       rvals[0] = bs*(baij->rstartbs + i);
1044       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1045       for (j=ai[i]; j<ai[i+1]; j++) {
1046         col = baij->garray[aj[j]]*bs;
1047         for (k=0; k<bs; k++) {
1048           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1049           col++; a += bs;
1050         }
1051       }
1052     }
1053     ierr = PetscFree(rvals);CHKERRQ(ierr);
1054     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1055     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1056     /*
1057        Everyone has to call to draw the matrix since the graphics waits are
1058        synchronized across all processors that share the PetscDraw object
1059     */
1060     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1061     if (!rank) {
1062       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1063       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1064     }
1065     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1066     ierr = MatDestroy(A);CHKERRQ(ierr);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatView_MPIBAIJ"
1073 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1074 {
1075   PetscErrorCode ierr;
1076   PetscTruth     iascii,isdraw,issocket,isbinary;
1077 
1078   PetscFunctionBegin;
1079   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1080   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1081   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1082   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1083   if (iascii || isdraw || issocket || isbinary) {
1084     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1085   } else {
1086     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1087   }
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1093 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1094 {
1095   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099 #if defined(PETSC_USE_LOG)
1100   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1101 #endif
1102   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1103   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1104   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1105   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1106 #if defined (PETSC_USE_CTABLE)
1107   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1108 #else
1109   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1110 #endif
1111   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1112   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1113   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1114   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1115   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1116   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1117   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1118   ierr = PetscFree(baij);CHKERRQ(ierr);
1119 
1120   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatMult_MPIBAIJ"
1133 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1134 {
1135   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1136   PetscErrorCode ierr;
1137   PetscInt       nt;
1138 
1139   PetscFunctionBegin;
1140   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1141   if (nt != A->cmap->n) {
1142     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1143   }
1144   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1145   if (nt != A->rmap->n) {
1146     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1147   }
1148   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1150   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1151   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1157 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1158 {
1159   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1164   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1165   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1166   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1172 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1173 {
1174   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1175   PetscErrorCode ierr;
1176   PetscTruth     merged;
1177 
1178   PetscFunctionBegin;
1179   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1180   /* do nondiagonal part */
1181   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1182   if (!merged) {
1183     /* send it on its way */
1184     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1185     /* do local part */
1186     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1187     /* receive remote parts: note this assumes the values are not actually */
1188     /* inserted in yy until the next line */
1189     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1190   } else {
1191     /* do local part */
1192     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1193     /* send it on its way */
1194     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1195     /* values actually were received in the Begin() but we need to call this nop */
1196     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1203 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1204 {
1205   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   /* do nondiagonal part */
1210   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1211   /* send it on its way */
1212   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1213   /* do local part */
1214   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1215   /* receive remote parts: note this assumes the values are not actually */
1216   /* inserted in yy until the next line, which is true for my implementation*/
1217   /* but is not perhaps always true. */
1218   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 /*
1223   This only works correctly for square matrices where the subblock A->A is the
1224    diagonal block
1225 */
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1228 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1229 {
1230   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1235   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatScale_MPIBAIJ"
1241 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1242 {
1243   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1248   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1254 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1255 {
1256   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1257   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1258   PetscErrorCode ierr;
1259   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1260   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1261   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1262 
1263   PetscFunctionBegin;
1264   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1265   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1266   mat->getrowactive = PETSC_TRUE;
1267 
1268   if (!mat->rowvalues && (idx || v)) {
1269     /*
1270         allocate enough space to hold information from the longest row.
1271     */
1272     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1273     PetscInt     max = 1,mbs = mat->mbs,tmp;
1274     for (i=0; i<mbs; i++) {
1275       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1276       if (max < tmp) { max = tmp; }
1277     }
1278     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1279   }
1280   lrow = row - brstart;
1281 
1282   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1283   if (!v)   {pvA = 0; pvB = 0;}
1284   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1285   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1286   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1287   nztot = nzA + nzB;
1288 
1289   cmap  = mat->garray;
1290   if (v  || idx) {
1291     if (nztot) {
1292       /* Sort by increasing column numbers, assuming A and B already sorted */
1293       PetscInt imark = -1;
1294       if (v) {
1295         *v = v_p = mat->rowvalues;
1296         for (i=0; i<nzB; i++) {
1297           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1298           else break;
1299         }
1300         imark = i;
1301         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1302         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1303       }
1304       if (idx) {
1305         *idx = idx_p = mat->rowindices;
1306         if (imark > -1) {
1307           for (i=0; i<imark; i++) {
1308             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1309           }
1310         } else {
1311           for (i=0; i<nzB; i++) {
1312             if (cmap[cworkB[i]/bs] < cstart)
1313               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1314             else break;
1315           }
1316           imark = i;
1317         }
1318         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1319         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1320       }
1321     } else {
1322       if (idx) *idx = 0;
1323       if (v)   *v   = 0;
1324     }
1325   }
1326   *nz = nztot;
1327   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1328   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1334 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1335 {
1336   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1337 
1338   PetscFunctionBegin;
1339   if (!baij->getrowactive) {
1340     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1341   }
1342   baij->getrowactive = PETSC_FALSE;
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1348 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1349 {
1350   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1351   PetscErrorCode ierr;
1352 
1353   PetscFunctionBegin;
1354   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1355   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1361 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1362 {
1363   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1364   Mat            A = a->A,B = a->B;
1365   PetscErrorCode ierr;
1366   PetscReal      isend[5],irecv[5];
1367 
1368   PetscFunctionBegin;
1369   info->block_size     = (PetscReal)matin->rmap->bs;
1370   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1371   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1372   isend[3] = info->memory;  isend[4] = info->mallocs;
1373   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1374   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1375   isend[3] += info->memory;  isend[4] += info->mallocs;
1376   if (flag == MAT_LOCAL) {
1377     info->nz_used      = isend[0];
1378     info->nz_allocated = isend[1];
1379     info->nz_unneeded  = isend[2];
1380     info->memory       = isend[3];
1381     info->mallocs      = isend[4];
1382   } else if (flag == MAT_GLOBAL_MAX) {
1383     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1384     info->nz_used      = irecv[0];
1385     info->nz_allocated = irecv[1];
1386     info->nz_unneeded  = irecv[2];
1387     info->memory       = irecv[3];
1388     info->mallocs      = irecv[4];
1389   } else if (flag == MAT_GLOBAL_SUM) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else {
1397     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1398   }
1399   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1400   info->fill_ratio_needed = 0;
1401   info->factor_mallocs    = 0;
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1407 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1408 {
1409   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413   switch (op) {
1414   case MAT_NEW_NONZERO_LOCATIONS:
1415   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1416   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1417   case MAT_KEEP_NONZERO_PATTERN:
1418   case MAT_NEW_NONZERO_LOCATION_ERR:
1419     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1420     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1421     break;
1422   case MAT_ROW_ORIENTED:
1423     a->roworiented = flg;
1424     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1425     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1426     break;
1427   case MAT_NEW_DIAGONALS:
1428     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1429     break;
1430   case MAT_IGNORE_OFF_PROC_ENTRIES:
1431     a->donotstash = flg;
1432     break;
1433   case MAT_USE_HASH_TABLE:
1434     a->ht_flag = flg;
1435     break;
1436   case MAT_SYMMETRIC:
1437   case MAT_STRUCTURALLY_SYMMETRIC:
1438   case MAT_HERMITIAN:
1439   case MAT_SYMMETRY_ETERNAL:
1440     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1441     break;
1442   default:
1443     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1444   }
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1450 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1451 {
1452   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1453   Mat_SeqBAIJ    *Aloc;
1454   Mat            B;
1455   PetscErrorCode ierr;
1456   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1457   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1458   MatScalar      *a;
1459 
1460   PetscFunctionBegin;
1461   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1462   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1463     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1464     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1465     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1466     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1467   } else {
1468     B = *matout;
1469   }
1470 
1471   /* copy over the A part */
1472   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1473   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1474   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1475 
1476   for (i=0; i<mbs; i++) {
1477     rvals[0] = bs*(baij->rstartbs + i);
1478     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1479     for (j=ai[i]; j<ai[i+1]; j++) {
1480       col = (baij->cstartbs+aj[j])*bs;
1481       for (k=0; k<bs; k++) {
1482         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1483         col++; a += bs;
1484       }
1485     }
1486   }
1487   /* copy over the B part */
1488   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1489   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1490   for (i=0; i<mbs; i++) {
1491     rvals[0] = bs*(baij->rstartbs + i);
1492     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1493     for (j=ai[i]; j<ai[i+1]; j++) {
1494       col = baij->garray[aj[j]]*bs;
1495       for (k=0; k<bs; k++) {
1496         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1497         col++; a += bs;
1498       }
1499     }
1500   }
1501   ierr = PetscFree(rvals);CHKERRQ(ierr);
1502   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1503   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1504 
1505   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1506     *matout = B;
1507   } else {
1508     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1509   }
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1515 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1516 {
1517   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1518   Mat            a = baij->A,b = baij->B;
1519   PetscErrorCode ierr;
1520   PetscInt       s1,s2,s3;
1521 
1522   PetscFunctionBegin;
1523   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1524   if (rr) {
1525     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1526     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1527     /* Overlap communication with computation. */
1528     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1529   }
1530   if (ll) {
1531     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1532     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1533     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1534   }
1535   /* scale  the diagonal block */
1536   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1537 
1538   if (rr) {
1539     /* Do a scatter end and then right scale the off-diagonal block */
1540     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1541     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1542   }
1543 
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1549 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1550 {
1551   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1552   PetscErrorCode ierr;
1553   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1554   PetscInt       i,*owners = A->rmap->range;
1555   PetscInt       *nprocs,j,idx,nsends,row;
1556   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1557   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1558   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1559   MPI_Comm       comm = ((PetscObject)A)->comm;
1560   MPI_Request    *send_waits,*recv_waits;
1561   MPI_Status     recv_status,*send_status;
1562 #if defined(PETSC_DEBUG)
1563   PetscTruth     found = PETSC_FALSE;
1564 #endif
1565 
1566   PetscFunctionBegin;
1567   /*  first count number of contributors to each processor */
1568   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1569   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1570   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1571   j = 0;
1572   for (i=0; i<N; i++) {
1573     if (lastidx > (idx = rows[i])) j = 0;
1574     lastidx = idx;
1575     for (; j<size; j++) {
1576       if (idx >= owners[j] && idx < owners[j+1]) {
1577         nprocs[2*j]++;
1578         nprocs[2*j+1] = 1;
1579         owner[i] = j;
1580 #if defined(PETSC_DEBUG)
1581         found = PETSC_TRUE;
1582 #endif
1583         break;
1584       }
1585     }
1586 #if defined(PETSC_DEBUG)
1587     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1588     found = PETSC_FALSE;
1589 #endif
1590   }
1591   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1592 
1593   /* inform other processors of number of messages and max length*/
1594   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1595 
1596   /* post receives:   */
1597   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1598   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1599   for (i=0; i<nrecvs; i++) {
1600     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1601   }
1602 
1603   /* do sends:
1604      1) starts[i] gives the starting index in svalues for stuff going to
1605      the ith processor
1606   */
1607   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1608   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1609   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1610   starts[0]  = 0;
1611   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1612   for (i=0; i<N; i++) {
1613     svalues[starts[owner[i]]++] = rows[i];
1614   }
1615 
1616   starts[0] = 0;
1617   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1618   count = 0;
1619   for (i=0; i<size; i++) {
1620     if (nprocs[2*i+1]) {
1621       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1622     }
1623   }
1624   ierr = PetscFree(starts);CHKERRQ(ierr);
1625 
1626   base = owners[rank];
1627 
1628   /*  wait on receives */
1629   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1630   count  = nrecvs;
1631   slen = 0;
1632   while (count) {
1633     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1634     /* unpack receives into our local space */
1635     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1636     source[imdex]  = recv_status.MPI_SOURCE;
1637     lens[imdex]    = n;
1638     slen          += n;
1639     count--;
1640   }
1641   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1642 
1643   /* move the data into the send scatter */
1644   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1645   count = 0;
1646   for (i=0; i<nrecvs; i++) {
1647     values = rvalues + i*nmax;
1648     for (j=0; j<lens[i]; j++) {
1649       lrows[count++] = values[j] - base;
1650     }
1651   }
1652   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1653   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1654   ierr = PetscFree(owner);CHKERRQ(ierr);
1655   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1656 
1657   /* actually zap the local rows */
1658   /*
1659         Zero the required rows. If the "diagonal block" of the matrix
1660      is square and the user wishes to set the diagonal we use separate
1661      code so that MatSetValues() is not called for each diagonal allocating
1662      new memory, thus calling lots of mallocs and slowing things down.
1663 
1664   */
1665   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1666   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1667   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1668     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1669   } else if (diag != 0.0) {
1670     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1671     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1672       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1673 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1674     }
1675     for (i=0; i<slen; i++) {
1676       row  = lrows[i] + rstart_bs;
1677       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1678     }
1679     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1680     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681   } else {
1682     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1683   }
1684 
1685   ierr = PetscFree(lrows);CHKERRQ(ierr);
1686 
1687   /* wait on sends */
1688   if (nsends) {
1689     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1690     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1691     ierr = PetscFree(send_status);CHKERRQ(ierr);
1692   }
1693   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1694   ierr = PetscFree(svalues);CHKERRQ(ierr);
1695 
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1701 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1702 {
1703   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1704   PetscErrorCode ierr;
1705 
1706   PetscFunctionBegin;
1707   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatEqual_MPIBAIJ"
1715 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1716 {
1717   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1718   Mat            a,b,c,d;
1719   PetscTruth     flg;
1720   PetscErrorCode ierr;
1721 
1722   PetscFunctionBegin;
1723   a = matA->A; b = matA->B;
1724   c = matB->A; d = matB->B;
1725 
1726   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1727   if (flg) {
1728     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1729   }
1730   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatCopy_MPIBAIJ"
1736 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1737 {
1738   PetscErrorCode ierr;
1739   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1740   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1741 
1742   PetscFunctionBegin;
1743   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1744   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1745     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1746   } else {
1747     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1748     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1749   }
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1755 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1756 {
1757   PetscErrorCode ierr;
1758 
1759   PetscFunctionBegin;
1760   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #include "petscblaslapack.h"
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1767 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1768 {
1769   PetscErrorCode ierr;
1770   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1771   PetscBLASInt   bnz,one=1;
1772   Mat_SeqBAIJ    *x,*y;
1773 
1774   PetscFunctionBegin;
1775   if (str == SAME_NONZERO_PATTERN) {
1776     PetscScalar alpha = a;
1777     x = (Mat_SeqBAIJ *)xx->A->data;
1778     y = (Mat_SeqBAIJ *)yy->A->data;
1779     bnz = PetscBLASIntCast(x->nz);
1780     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1781     x = (Mat_SeqBAIJ *)xx->B->data;
1782     y = (Mat_SeqBAIJ *)yy->B->data;
1783     bnz = PetscBLASIntCast(x->nz);
1784     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1785   } else {
1786     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1787   }
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 #undef __FUNCT__
1792 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1793 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1794 {
1795   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1796   PetscInt rbs,cbs;
1797   PetscErrorCode ierr;
1798 
1799   PetscFunctionBegin;
1800   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1801   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1802   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1803   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1804   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1805   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1811 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1812 {
1813   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1814   PetscErrorCode ierr;
1815 
1816   PetscFunctionBegin;
1817   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1818   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1824 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1825 {
1826   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1827   PetscErrorCode ierr;
1828 
1829   PetscFunctionBegin;
1830   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1831   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1837 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1838 {
1839   PetscErrorCode ierr;
1840   IS             iscol_local;
1841   PetscInt       csize;
1842 
1843   PetscFunctionBegin;
1844   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1845   if (call == MAT_REUSE_MATRIX) {
1846     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1847     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1848   } else {
1849     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1850   }
1851   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1852   if (call == MAT_INITIAL_MATRIX) {
1853     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1854     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1855   }
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1861 /*
1862     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1863   in local and then by concatenating the local matrices the end result.
1864   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1865 */
1866 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1867 {
1868   PetscErrorCode ierr;
1869   PetscMPIInt    rank,size;
1870   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1871   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1872   Mat            *local,M,Mreuse;
1873   MatScalar      *vwork,*aa;
1874   MPI_Comm       comm = ((PetscObject)mat)->comm;
1875   Mat_SeqBAIJ    *aij;
1876 
1877 
1878   PetscFunctionBegin;
1879   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1880   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1881 
1882   if (call ==  MAT_REUSE_MATRIX) {
1883     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1884     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1885     local = &Mreuse;
1886     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1887   } else {
1888     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1889     Mreuse = *local;
1890     ierr   = PetscFree(local);CHKERRQ(ierr);
1891   }
1892 
1893   /*
1894       m - number of local rows
1895       n - number of columns (same on all processors)
1896       rstart - first row in new global matrix generated
1897   */
1898   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1899   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1900   m    = m/bs;
1901   n    = n/bs;
1902 
1903   if (call == MAT_INITIAL_MATRIX) {
1904     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1905     ii  = aij->i;
1906     jj  = aij->j;
1907 
1908     /*
1909         Determine the number of non-zeros in the diagonal and off-diagonal
1910         portions of the matrix in order to do correct preallocation
1911     */
1912 
1913     /* first get start and end of "diagonal" columns */
1914     if (csize == PETSC_DECIDE) {
1915       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1916       if (mglobal == n*bs) { /* square matrix */
1917 	nlocal = m;
1918       } else {
1919         nlocal = n/size + ((n % size) > rank);
1920       }
1921     } else {
1922       nlocal = csize/bs;
1923     }
1924     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1925     rstart = rend - nlocal;
1926     if (rank == size - 1 && rend != n) {
1927       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1928     }
1929 
1930     /* next, compute all the lengths */
1931     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1932     olens = dlens + m;
1933     for (i=0; i<m; i++) {
1934       jend = ii[i+1] - ii[i];
1935       olen = 0;
1936       dlen = 0;
1937       for (j=0; j<jend; j++) {
1938         if (*jj < rstart || *jj >= rend) olen++;
1939         else dlen++;
1940         jj++;
1941       }
1942       olens[i] = olen;
1943       dlens[i] = dlen;
1944     }
1945     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1946     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1947     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1948     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1949     ierr = PetscFree(dlens);CHKERRQ(ierr);
1950   } else {
1951     PetscInt ml,nl;
1952 
1953     M = *newmat;
1954     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1955     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1956     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1957     /*
1958          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1959        rather than the slower MatSetValues().
1960     */
1961     M->was_assembled = PETSC_TRUE;
1962     M->assembled     = PETSC_FALSE;
1963   }
1964   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1965   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1966   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1967   ii  = aij->i;
1968   jj  = aij->j;
1969   aa  = aij->a;
1970   for (i=0; i<m; i++) {
1971     row   = rstart/bs + i;
1972     nz    = ii[i+1] - ii[i];
1973     cwork = jj;     jj += nz;
1974     vwork = aa;     aa += nz;
1975     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1976   }
1977 
1978   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1979   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1980   *newmat = M;
1981 
1982   /* save submatrix used in processor for next request */
1983   if (call ==  MAT_INITIAL_MATRIX) {
1984     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1985     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1986   }
1987 
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 #undef __FUNCT__
1992 #define __FUNCT__ "MatPermute_MPIBAIJ"
1993 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1994 {
1995   MPI_Comm       comm,pcomm;
1996   PetscInt       first,local_size,nrows;
1997   const PetscInt *rows;
1998   PetscMPIInt    size;
1999   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2000   PetscErrorCode ierr;
2001 
2002   PetscFunctionBegin;
2003   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2004   /* make a collective version of 'rowp' */
2005   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2006   if (pcomm==comm) {
2007     crowp = rowp;
2008   } else {
2009     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2010     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2011     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2012     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2013   }
2014   /* collect the global row permutation and invert it */
2015   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2016   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2017   if (pcomm!=comm) {
2018     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2019   }
2020   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2021   /* get the local target indices */
2022   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2023   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2024   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2025   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2026   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2027   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2028   /* the column permutation is so much easier;
2029      make a local version of 'colp' and invert it */
2030   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2031   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2032   if (size==1) {
2033     lcolp = colp;
2034   } else {
2035     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2036     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2037     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2038   }
2039   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2040   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2041   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2042   if (size>1) {
2043     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2044     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2045   }
2046   /* now we just get the submatrix */
2047   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2048   /* clean up */
2049   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2050   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 #undef __FUNCT__
2055 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2056 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2057 {
2058   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2059   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2060 
2061   PetscFunctionBegin;
2062   if (nghosts) { *nghosts = B->nbs;}
2063   if (ghosts) {*ghosts = baij->garray;}
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2068 
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2071 /*
2072     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2073 */
2074 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2075 {
2076   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2077   PetscErrorCode        ierr;
2078   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2079   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2080   const PetscInt        *is;
2081   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2082   PetscInt              *rowhit,M,cstart,cend,colb;
2083   PetscInt              *columnsforrow,l;
2084   IS                    *isa;
2085   PetscTruth             done,flg;
2086   ISLocalToGlobalMapping map = mat->bmapping;
2087   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2088 
2089   PetscFunctionBegin;
2090   if (!mat->assembled) {
2091     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2092   }
2093   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2094 
2095   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2096 
2097   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2098   M                = mat->rmap->n/bs;
2099   cstart           = mat->cmap->rstart/bs;
2100   cend             = mat->cmap->rend/bs;
2101   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2102   c->N             = mat->cmap->N/bs;
2103   c->m             = mat->rmap->n/bs;
2104   c->rstart        = mat->rmap->rstart/bs;
2105 
2106   c->ncolors       = nis;
2107   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2108   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2109   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2110   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2111   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2112   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2113 
2114   /* Allow access to data structures of local part of matrix */
2115   if (!baij->colmap) {
2116     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2117   }
2118   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2119   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2120 
2121   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2122   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2123 
2124   for (i=0; i<nis; i++) {
2125     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2126     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2127     c->ncolumns[i] = n;
2128     if (n) {
2129       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2130       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2131       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2132     } else {
2133       c->columns[i]  = 0;
2134     }
2135 
2136     if (ctype == IS_COLORING_GLOBAL){
2137       /* Determine the total (parallel) number of columns of this color */
2138       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2139       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2140 
2141       nn   = PetscMPIIntCast(n);
2142       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2143       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2144       if (!nctot) {
2145         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2146       }
2147 
2148       disp[0] = 0;
2149       for (j=1; j<size; j++) {
2150         disp[j] = disp[j-1] + ncolsonproc[j-1];
2151       }
2152 
2153       /* Get complete list of columns for color on each processor */
2154       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2155       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2156       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2157     } else if (ctype == IS_COLORING_GHOSTED){
2158       /* Determine local number of columns of this color on this process, including ghost points */
2159       nctot = n;
2160       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2161       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2162     } else {
2163       SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2164     }
2165 
2166     /*
2167        Mark all rows affect by these columns
2168     */
2169     /* Temporary option to allow for debugging/testing */
2170     flg  = PETSC_FALSE;
2171     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2172     if (!flg) {/*-----------------------------------------------------------------------------*/
2173       /* crude, fast version */
2174       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2175       /* loop over columns*/
2176       for (j=0; j<nctot; j++) {
2177         if (ctype == IS_COLORING_GHOSTED) {
2178           col = ltog[cols[j]];
2179         } else {
2180           col  = cols[j];
2181         }
2182         if (col >= cstart && col < cend) {
2183           /* column is in diagonal block of matrix */
2184           rows = A_cj + A_ci[col-cstart];
2185           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2186         } else {
2187 #if defined (PETSC_USE_CTABLE)
2188           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr)
2189 	  colb --;
2190 #else
2191           colb = baij->colmap[col] - 1;
2192 #endif
2193           if (colb == -1) {
2194             m = 0;
2195           } else {
2196             colb = colb/bs;
2197             rows = B_cj + B_ci[colb];
2198             m    = B_ci[colb+1] - B_ci[colb];
2199           }
2200         }
2201         /* loop over columns marking them in rowhit */
2202         for (k=0; k<m; k++) {
2203           rowhit[*rows++] = col + 1;
2204         }
2205       }
2206 
2207       /* count the number of hits */
2208       nrows = 0;
2209       for (j=0; j<M; j++) {
2210         if (rowhit[j]) nrows++;
2211       }
2212       c->nrows[i]         = nrows;
2213       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2214       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2215       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2216       nrows = 0;
2217       for (j=0; j<M; j++) {
2218         if (rowhit[j]) {
2219           c->rows[i][nrows]           = j;
2220           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2221           nrows++;
2222         }
2223       }
2224     } else {/*-------------------------------------------------------------------------------*/
2225       /* slow version, using rowhit as a linked list */
2226       PetscInt currentcol,fm,mfm;
2227       rowhit[M] = M;
2228       nrows     = 0;
2229       /* loop over columns*/
2230       for (j=0; j<nctot; j++) {
2231         if (ctype == IS_COLORING_GHOSTED) {
2232           col = ltog[cols[j]];
2233         } else {
2234           col  = cols[j];
2235         }
2236         if (col >= cstart && col < cend) {
2237           /* column is in diagonal block of matrix */
2238           rows = A_cj + A_ci[col-cstart];
2239           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2240         } else {
2241 #if defined (PETSC_USE_CTABLE)
2242           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2243           colb --;
2244 #else
2245           colb = baij->colmap[col] - 1;
2246 #endif
2247           if (colb == -1) {
2248             m = 0;
2249           } else {
2250             colb = colb/bs;
2251             rows = B_cj + B_ci[colb];
2252             m    = B_ci[colb+1] - B_ci[colb];
2253           }
2254         }
2255 
2256         /* loop over columns marking them in rowhit */
2257         fm    = M; /* fm points to first entry in linked list */
2258         for (k=0; k<m; k++) {
2259           currentcol = *rows++;
2260 	  /* is it already in the list? */
2261           do {
2262             mfm  = fm;
2263             fm   = rowhit[fm];
2264           } while (fm < currentcol);
2265           /* not in list so add it */
2266           if (fm != currentcol) {
2267             nrows++;
2268             columnsforrow[currentcol] = col;
2269             /* next three lines insert new entry into linked list */
2270             rowhit[mfm]               = currentcol;
2271             rowhit[currentcol]        = fm;
2272             fm                        = currentcol;
2273             /* fm points to present position in list since we know the columns are sorted */
2274           } else {
2275             SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2276           }
2277         }
2278       }
2279       c->nrows[i]         = nrows;
2280       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2281       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2282       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2283       /* now store the linked list of rows into c->rows[i] */
2284       nrows = 0;
2285       fm    = rowhit[M];
2286       do {
2287         c->rows[i][nrows]            = fm;
2288         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2289         fm                           = rowhit[fm];
2290       } while (fm < M);
2291     } /* ---------------------------------------------------------------------------------------*/
2292     ierr = PetscFree(cols);CHKERRQ(ierr);
2293   }
2294 
2295   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2296   /*
2297        vscale will contain the "diagonal" on processor scalings followed by the off processor
2298   */
2299   if (ctype == IS_COLORING_GLOBAL) {
2300     PetscInt *garray;
2301     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2302     for (i=0; i<baij->B->cmap->n/bs; i++) {
2303       for (j=0; j<bs; j++) {
2304         garray[i*bs+j] = bs*baij->garray[i]+j;
2305       }
2306     }
2307     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2308     ierr = PetscFree(garray);CHKERRQ(ierr);
2309     CHKMEMQ;
2310     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2311     for (k=0; k<c->ncolors; k++) {
2312       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2313       for (l=0; l<c->nrows[k]; l++) {
2314         col = c->columnsforrow[k][l];
2315         if (col >= cstart && col < cend) {
2316           /* column is in diagonal block of matrix */
2317           colb = col - cstart;
2318         } else {
2319           /* column  is in "off-processor" part */
2320 #if defined (PETSC_USE_CTABLE)
2321           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2322           colb --;
2323 #else
2324           colb = baij->colmap[col] - 1;
2325 #endif
2326           colb = colb/bs;
2327           colb += cend - cstart;
2328         }
2329         c->vscaleforrow[k][l] = colb;
2330       }
2331     }
2332   } else if (ctype == IS_COLORING_GHOSTED) {
2333     /* Get gtol mapping */
2334     PetscInt N = mat->cmap->N, *gtol;
2335     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2336     for (i=0; i<N; i++) gtol[i] = -1;
2337     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2338 
2339     c->vscale = 0; /* will be created in MatFDColoringApply() */
2340     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2341     for (k=0; k<c->ncolors; k++) {
2342       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2343       for (l=0; l<c->nrows[k]; l++) {
2344         col = c->columnsforrow[k][l];      /* global column index */
2345         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2346       }
2347     }
2348     ierr = PetscFree(gtol);CHKERRQ(ierr);
2349   }
2350   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2351 
2352   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2353   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2354   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2355   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2356     CHKMEMQ;
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 #undef __FUNCT__
2361 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2362 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2363 {
2364   Mat            B;
2365   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2366   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2367   Mat_SeqAIJ     *b;
2368   PetscErrorCode ierr;
2369   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2370   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2371   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2372 
2373   PetscFunctionBegin;
2374   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2375   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2376 
2377   /* ----------------------------------------------------------------
2378      Tell every processor the number of nonzeros per row
2379   */
2380   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2381   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2382     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2383   }
2384   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2385   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2386   displs     = recvcounts + size;
2387   for (i=0; i<size; i++) {
2388     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2389     displs[i]     = A->rmap->range[i]/bs;
2390   }
2391 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2392   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2393 #else
2394   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2395 #endif
2396   /* ---------------------------------------------------------------
2397      Create the sequential matrix of the same type as the local block diagonal
2398   */
2399   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2400   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2401   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2402   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2403   b = (Mat_SeqAIJ *)B->data;
2404 
2405   /*--------------------------------------------------------------------
2406     Copy my part of matrix column indices over
2407   */
2408   sendcount  = ad->nz + bd->nz;
2409   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2410   a_jsendbuf = ad->j;
2411   b_jsendbuf = bd->j;
2412   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2413   cnt        = 0;
2414   for (i=0; i<n; i++) {
2415 
2416     /* put in lower diagonal portion */
2417     m = bd->i[i+1] - bd->i[i];
2418     while (m > 0) {
2419       /* is it above diagonal (in bd (compressed) numbering) */
2420       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2421       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2422       m--;
2423     }
2424 
2425     /* put in diagonal portion */
2426     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2427       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2428     }
2429 
2430     /* put in upper diagonal portion */
2431     while (m-- > 0) {
2432       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2433     }
2434   }
2435   if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2436 
2437   /*--------------------------------------------------------------------
2438     Gather all column indices to all processors
2439   */
2440   for (i=0; i<size; i++) {
2441     recvcounts[i] = 0;
2442     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2443       recvcounts[i] += lens[j];
2444     }
2445   }
2446   displs[0]  = 0;
2447   for (i=1; i<size; i++) {
2448     displs[i] = displs[i-1] + recvcounts[i-1];
2449   }
2450 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2451   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2452 #else
2453   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2454 #endif
2455   /*--------------------------------------------------------------------
2456     Assemble the matrix into useable form (note numerical values not yet set)
2457   */
2458   /* set the b->ilen (length of each row) values */
2459   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2460   /* set the b->i indices */
2461   b->i[0] = 0;
2462   for (i=1; i<=A->rmap->N/bs; i++) {
2463     b->i[i] = b->i[i-1] + lens[i-1];
2464   }
2465   ierr = PetscFree(lens);CHKERRQ(ierr);
2466   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2467   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2468   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2469 
2470   if (A->symmetric){
2471     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2472   } else if (A->hermitian) {
2473     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2474   } else if (A->structurally_symmetric) {
2475     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2476   }
2477   *newmat = B;
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatSOR_MPIBAIJ"
2483 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2484 {
2485   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2486   PetscErrorCode ierr;
2487   Vec            bb1 = 0;
2488 
2489   PetscFunctionBegin;
2490   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2491     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2492   }
2493 
2494   if (flag == SOR_APPLY_UPPER) {
2495     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2496     PetscFunctionReturn(0);
2497   }
2498 
2499   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2500     if (flag & SOR_ZERO_INITIAL_GUESS) {
2501       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2502       its--;
2503     }
2504 
2505     while (its--) {
2506       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2507       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2508 
2509       /* update rhs: bb1 = bb - B*x */
2510       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2511       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2512 
2513       /* local sweep */
2514       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2515     }
2516   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2517     if (flag & SOR_ZERO_INITIAL_GUESS) {
2518       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2519       its--;
2520     }
2521     while (its--) {
2522       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2523       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2524 
2525       /* update rhs: bb1 = bb - B*x */
2526       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2527       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2528 
2529       /* local sweep */
2530       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2531     }
2532   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2533     if (flag & SOR_ZERO_INITIAL_GUESS) {
2534       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2535       its--;
2536     }
2537     while (its--) {
2538       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2539       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2540 
2541       /* update rhs: bb1 = bb - B*x */
2542       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2543       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2544 
2545       /* local sweep */
2546       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2547     }
2548   } else {
2549     SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2550   }
2551 
2552   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2557 
2558 
2559 /* -------------------------------------------------------------------*/
2560 static struct _MatOps MatOps_Values = {
2561        MatSetValues_MPIBAIJ,
2562        MatGetRow_MPIBAIJ,
2563        MatRestoreRow_MPIBAIJ,
2564        MatMult_MPIBAIJ,
2565 /* 4*/ MatMultAdd_MPIBAIJ,
2566        MatMultTranspose_MPIBAIJ,
2567        MatMultTransposeAdd_MPIBAIJ,
2568        0,
2569        0,
2570        0,
2571 /*10*/ 0,
2572        0,
2573        0,
2574        MatSOR_MPIBAIJ,
2575        MatTranspose_MPIBAIJ,
2576 /*15*/ MatGetInfo_MPIBAIJ,
2577        MatEqual_MPIBAIJ,
2578        MatGetDiagonal_MPIBAIJ,
2579        MatDiagonalScale_MPIBAIJ,
2580        MatNorm_MPIBAIJ,
2581 /*20*/ MatAssemblyBegin_MPIBAIJ,
2582        MatAssemblyEnd_MPIBAIJ,
2583        MatSetOption_MPIBAIJ,
2584        MatZeroEntries_MPIBAIJ,
2585 /*24*/ MatZeroRows_MPIBAIJ,
2586        0,
2587        0,
2588        0,
2589        0,
2590 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2591        0,
2592        0,
2593        0,
2594        0,
2595 /*34*/ MatDuplicate_MPIBAIJ,
2596        0,
2597        0,
2598        0,
2599        0,
2600 /*39*/ MatAXPY_MPIBAIJ,
2601        MatGetSubMatrices_MPIBAIJ,
2602        MatIncreaseOverlap_MPIBAIJ,
2603        MatGetValues_MPIBAIJ,
2604        MatCopy_MPIBAIJ,
2605 /*44*/ 0,
2606        MatScale_MPIBAIJ,
2607        0,
2608        0,
2609        0,
2610 /*49*/ MatSetBlockSize_MPIBAIJ,
2611        0,
2612        0,
2613        0,
2614        0,
2615 /*54*/ MatFDColoringCreate_MPIBAIJ,
2616        0,
2617        MatSetUnfactored_MPIBAIJ,
2618        MatPermute_MPIBAIJ,
2619        MatSetValuesBlocked_MPIBAIJ,
2620 /*59*/ MatGetSubMatrix_MPIBAIJ,
2621        MatDestroy_MPIBAIJ,
2622        MatView_MPIBAIJ,
2623        0,
2624        0,
2625 /*64*/ 0,
2626        0,
2627        0,
2628        0,
2629        0,
2630 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2631        0,
2632        0,
2633        0,
2634        0,
2635 /*74*/ 0,
2636        MatFDColoringApply_BAIJ,
2637        0,
2638        0,
2639        0,
2640 /*79*/ 0,
2641        0,
2642        0,
2643        0,
2644        MatLoad_MPIBAIJ,
2645 /*84*/ 0,
2646        0,
2647        0,
2648        0,
2649        0,
2650 /*89*/ 0,
2651        0,
2652        0,
2653        0,
2654        0,
2655 /*94*/ 0,
2656        0,
2657        0,
2658        0,
2659        0,
2660 /*99*/ 0,
2661        0,
2662        0,
2663        0,
2664        0,
2665 /*104*/0,
2666        MatRealPart_MPIBAIJ,
2667        MatImaginaryPart_MPIBAIJ,
2668        0,
2669        0,
2670 /*109*/0,
2671        0,
2672        0,
2673        0,
2674        0,
2675 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2676        0,
2677        MatGetGhosts_MPIBAIJ
2678 };
2679 
2680 EXTERN_C_BEGIN
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2683 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2684 {
2685   PetscFunctionBegin;
2686   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2687   *iscopy = PETSC_FALSE;
2688   PetscFunctionReturn(0);
2689 }
2690 EXTERN_C_END
2691 
2692 EXTERN_C_BEGIN
2693 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2694 EXTERN_C_END
2695 
2696 EXTERN_C_BEGIN
2697 #undef __FUNCT__
2698 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2699 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2700 {
2701   PetscInt       m,rstart,cstart,cend;
2702   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2703   const PetscInt *JJ=0;
2704   PetscScalar    *values=0;
2705   PetscErrorCode ierr;
2706 
2707   PetscFunctionBegin;
2708 
2709   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2710   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2711   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2712   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2713   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2714   m      = B->rmap->n/bs;
2715   rstart = B->rmap->rstart/bs;
2716   cstart = B->cmap->rstart/bs;
2717   cend   = B->cmap->rend/bs;
2718 
2719   if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2720   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2721   for (i=0; i<m; i++) {
2722     nz = ii[i+1] - ii[i];
2723     if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2724     nz_max = PetscMax(nz_max,nz);
2725     JJ  = jj + ii[i];
2726     for (j=0; j<nz; j++) {
2727       if (*JJ >= cstart) break;
2728       JJ++;
2729     }
2730     d = 0;
2731     for (; j<nz; j++) {
2732       if (*JJ++ >= cend) break;
2733       d++;
2734     }
2735     d_nnz[i] = d;
2736     o_nnz[i] = nz - d;
2737   }
2738   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2739   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2740 
2741   values = (PetscScalar*)V;
2742   if (!values) {
2743     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2744     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2745   }
2746   for (i=0; i<m; i++) {
2747     PetscInt          row    = i + rstart;
2748     PetscInt          ncols  = ii[i+1] - ii[i];
2749     const PetscInt    *icols = jj + ii[i];
2750     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2751     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2752   }
2753 
2754   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2755   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2756   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2757 
2758   PetscFunctionReturn(0);
2759 }
2760 EXTERN_C_END
2761 
2762 #undef __FUNCT__
2763 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2764 /*@C
2765    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2766    (the default parallel PETSc format).
2767 
2768    Collective on MPI_Comm
2769 
2770    Input Parameters:
2771 +  A - the matrix
2772 .  i - the indices into j for the start of each local row (starts with zero)
2773 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2774 -  v - optional values in the matrix
2775 
2776    Level: developer
2777 
2778 .keywords: matrix, aij, compressed row, sparse, parallel
2779 
2780 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2781 @*/
2782 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2783 {
2784   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2785 
2786   PetscFunctionBegin;
2787   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2788   if (f) {
2789     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2790   }
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 EXTERN_C_BEGIN
2795 #undef __FUNCT__
2796 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2797 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2798 {
2799   Mat_MPIBAIJ    *b;
2800   PetscErrorCode ierr;
2801   PetscInt       i, newbs = PetscAbs(bs);
2802 
2803   PetscFunctionBegin;
2804   if (bs < 0) {
2805     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2806       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2807     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2808     bs   = PetscAbs(bs);
2809   }
2810   if ((d_nnz || o_nnz) && newbs != bs) {
2811     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2812   }
2813   bs = newbs;
2814 
2815 
2816   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2817   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2818   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2819   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2820   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2821 
2822   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2823   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2824   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2825   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2826 
2827   if (d_nnz) {
2828     for (i=0; i<B->rmap->n/bs; i++) {
2829       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2830     }
2831   }
2832   if (o_nnz) {
2833     for (i=0; i<B->rmap->n/bs; i++) {
2834       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2835     }
2836   }
2837 
2838   b = (Mat_MPIBAIJ*)B->data;
2839   b->bs2 = bs*bs;
2840   b->mbs = B->rmap->n/bs;
2841   b->nbs = B->cmap->n/bs;
2842   b->Mbs = B->rmap->N/bs;
2843   b->Nbs = B->cmap->N/bs;
2844 
2845   for (i=0; i<=b->size; i++) {
2846     b->rangebs[i] = B->rmap->range[i]/bs;
2847   }
2848   b->rstartbs = B->rmap->rstart/bs;
2849   b->rendbs   = B->rmap->rend/bs;
2850   b->cstartbs = B->cmap->rstart/bs;
2851   b->cendbs   = B->cmap->rend/bs;
2852 
2853   if (!B->preallocated) {
2854     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2855     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2856     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2857     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2858     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2859     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2860     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2861     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2862     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2863   }
2864 
2865   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2866   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2867   B->preallocated = PETSC_TRUE;
2868   PetscFunctionReturn(0);
2869 }
2870 EXTERN_C_END
2871 
2872 EXTERN_C_BEGIN
2873 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2874 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2875 EXTERN_C_END
2876 
2877 
2878 EXTERN_C_BEGIN
2879 #undef __FUNCT__
2880 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2881 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2882 {
2883   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2884   PetscErrorCode ierr;
2885   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2886   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2887   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2888 
2889   PetscFunctionBegin;
2890   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2891   ii[0] = 0;
2892   CHKMEMQ;
2893   for (i=0; i<M; i++) {
2894     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2895     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2896     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2897     /* remove one from count of matrix has diagonal */
2898     for (j=id[i]; j<id[i+1]; j++) {
2899       if (jd[j] == i) {ii[i+1]--;break;}
2900     }
2901   CHKMEMQ;
2902   }
2903   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2904   cnt = 0;
2905   for (i=0; i<M; i++) {
2906     for (j=io[i]; j<io[i+1]; j++) {
2907       if (garray[jo[j]] > rstart) break;
2908       jj[cnt++] = garray[jo[j]];
2909   CHKMEMQ;
2910     }
2911     for (k=id[i]; k<id[i+1]; k++) {
2912       if (jd[k] != i) {
2913         jj[cnt++] = rstart + jd[k];
2914   CHKMEMQ;
2915       }
2916     }
2917     for (;j<io[i+1]; j++) {
2918       jj[cnt++] = garray[jo[j]];
2919   CHKMEMQ;
2920     }
2921   }
2922   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2923   PetscFunctionReturn(0);
2924 }
2925 EXTERN_C_END
2926 
2927 /*MC
2928    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2929 
2930    Options Database Keys:
2931 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2932 . -mat_block_size <bs> - set the blocksize used to store the matrix
2933 - -mat_use_hash_table <fact>
2934 
2935   Level: beginner
2936 
2937 .seealso: MatCreateMPIBAIJ
2938 M*/
2939 
2940 EXTERN_C_BEGIN
2941 #undef __FUNCT__
2942 #define __FUNCT__ "MatCreate_MPIBAIJ"
2943 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2944 {
2945   Mat_MPIBAIJ    *b;
2946   PetscErrorCode ierr;
2947   PetscTruth     flg;
2948 
2949   PetscFunctionBegin;
2950   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2951   B->data = (void*)b;
2952 
2953 
2954   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2955   B->mapping    = 0;
2956   B->assembled  = PETSC_FALSE;
2957 
2958   B->insertmode = NOT_SET_VALUES;
2959   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2960   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2961 
2962   /* build local table of row and column ownerships */
2963   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2964 
2965   /* build cache for off array entries formed */
2966   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2967   b->donotstash  = PETSC_FALSE;
2968   b->colmap      = PETSC_NULL;
2969   b->garray      = PETSC_NULL;
2970   b->roworiented = PETSC_TRUE;
2971 
2972   /* stuff used in block assembly */
2973   b->barray       = 0;
2974 
2975   /* stuff used for matrix vector multiply */
2976   b->lvec         = 0;
2977   b->Mvctx        = 0;
2978 
2979   /* stuff for MatGetRow() */
2980   b->rowindices   = 0;
2981   b->rowvalues    = 0;
2982   b->getrowactive = PETSC_FALSE;
2983 
2984   /* hash table stuff */
2985   b->ht           = 0;
2986   b->hd           = 0;
2987   b->ht_size      = 0;
2988   b->ht_flag      = PETSC_FALSE;
2989   b->ht_fact      = 0;
2990   b->ht_total_ct  = 0;
2991   b->ht_insert_ct = 0;
2992 
2993   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2994     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2995     if (flg) {
2996       PetscReal fact = 1.39;
2997       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2998       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2999       if (fact <= 1.0) fact = 1.39;
3000       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3001       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3002     }
3003   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3004 
3005   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3006                                      "MatConvert_MPIBAIJ_MPIAdj",
3007                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3008   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3009                                      "MatStoreValues_MPIBAIJ",
3010                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3011   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3012                                      "MatRetrieveValues_MPIBAIJ",
3013                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3014   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3015                                      "MatGetDiagonalBlock_MPIBAIJ",
3016                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3017   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3018                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3019                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3020   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3021 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3022 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3023   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3024                                      "MatDiagonalScaleLocal_MPIBAIJ",
3025                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3026   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3027                                      "MatSetHashTableFactor_MPIBAIJ",
3028                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3029   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3030   PetscFunctionReturn(0);
3031 }
3032 EXTERN_C_END
3033 
3034 /*MC
3035    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3036 
3037    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3038    and MATMPIBAIJ otherwise.
3039 
3040    Options Database Keys:
3041 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3042 
3043   Level: beginner
3044 
3045 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3046 M*/
3047 
3048 EXTERN_C_BEGIN
3049 #undef __FUNCT__
3050 #define __FUNCT__ "MatCreate_BAIJ"
3051 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3052 {
3053   PetscErrorCode ierr;
3054   PetscMPIInt    size;
3055 
3056   PetscFunctionBegin;
3057   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3058   if (size == 1) {
3059     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3060   } else {
3061     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3062   }
3063   PetscFunctionReturn(0);
3064 }
3065 EXTERN_C_END
3066 
3067 #undef __FUNCT__
3068 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3069 /*@C
3070    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3071    (block compressed row).  For good matrix assembly performance
3072    the user should preallocate the matrix storage by setting the parameters
3073    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3074    performance can be increased by more than a factor of 50.
3075 
3076    Collective on Mat
3077 
3078    Input Parameters:
3079 +  A - the matrix
3080 .  bs   - size of blockk
3081 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3082            submatrix  (same for all local rows)
3083 .  d_nnz - array containing the number of block nonzeros in the various block rows
3084            of the in diagonal portion of the local (possibly different for each block
3085            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3086 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3087            submatrix (same for all local rows).
3088 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3089            off-diagonal portion of the local submatrix (possibly different for
3090            each block row) or PETSC_NULL.
3091 
3092    If the *_nnz parameter is given then the *_nz parameter is ignored
3093 
3094    Options Database Keys:
3095 +   -mat_block_size - size of the blocks to use
3096 -   -mat_use_hash_table <fact>
3097 
3098    Notes:
3099    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3100    than it must be used on all processors that share the object for that argument.
3101 
3102    Storage Information:
3103    For a square global matrix we define each processor's diagonal portion
3104    to be its local rows and the corresponding columns (a square submatrix);
3105    each processor's off-diagonal portion encompasses the remainder of the
3106    local matrix (a rectangular submatrix).
3107 
3108    The user can specify preallocated storage for the diagonal part of
3109    the local submatrix with either d_nz or d_nnz (not both).  Set
3110    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3111    memory allocation.  Likewise, specify preallocated storage for the
3112    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3113 
3114    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3115    the figure below we depict these three local rows and all columns (0-11).
3116 
3117 .vb
3118            0 1 2 3 4 5 6 7 8 9 10 11
3119           -------------------
3120    row 3  |  o o o d d d o o o o o o
3121    row 4  |  o o o d d d o o o o o o
3122    row 5  |  o o o d d d o o o o o o
3123           -------------------
3124 .ve
3125 
3126    Thus, any entries in the d locations are stored in the d (diagonal)
3127    submatrix, and any entries in the o locations are stored in the
3128    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3129    stored simply in the MATSEQBAIJ format for compressed row storage.
3130 
3131    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3132    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3133    In general, for PDE problems in which most nonzeros are near the diagonal,
3134    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3135    or you will get TERRIBLE performance; see the users' manual chapter on
3136    matrices.
3137 
3138    You can call MatGetInfo() to get information on how effective the preallocation was;
3139    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3140    You can also run with the option -info and look for messages with the string
3141    malloc in them to see if additional memory allocation was needed.
3142 
3143    Level: intermediate
3144 
3145 .keywords: matrix, block, aij, compressed row, sparse, parallel
3146 
3147 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3148 @*/
3149 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3150 {
3151   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3152 
3153   PetscFunctionBegin;
3154   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3155   if (f) {
3156     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3157   }
3158   PetscFunctionReturn(0);
3159 }
3160 
3161 #undef __FUNCT__
3162 #define __FUNCT__ "MatCreateMPIBAIJ"
3163 /*@C
3164    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3165    (block compressed row).  For good matrix assembly performance
3166    the user should preallocate the matrix storage by setting the parameters
3167    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3168    performance can be increased by more than a factor of 50.
3169 
3170    Collective on MPI_Comm
3171 
3172    Input Parameters:
3173 +  comm - MPI communicator
3174 .  bs   - size of blockk
3175 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3176            This value should be the same as the local size used in creating the
3177            y vector for the matrix-vector product y = Ax.
3178 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3179            This value should be the same as the local size used in creating the
3180            x vector for the matrix-vector product y = Ax.
3181 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3182 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3183 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3184            submatrix  (same for all local rows)
3185 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3186            of the in diagonal portion of the local (possibly different for each block
3187            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3188 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3189            submatrix (same for all local rows).
3190 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3191            off-diagonal portion of the local submatrix (possibly different for
3192            each block row) or PETSC_NULL.
3193 
3194    Output Parameter:
3195 .  A - the matrix
3196 
3197    Options Database Keys:
3198 +   -mat_block_size - size of the blocks to use
3199 -   -mat_use_hash_table <fact>
3200 
3201    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3202    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3203    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3204 
3205    Notes:
3206    If the *_nnz parameter is given then the *_nz parameter is ignored
3207 
3208    A nonzero block is any block that as 1 or more nonzeros in it
3209 
3210    The user MUST specify either the local or global matrix dimensions
3211    (possibly both).
3212 
3213    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3214    than it must be used on all processors that share the object for that argument.
3215 
3216    Storage Information:
3217    For a square global matrix we define each processor's diagonal portion
3218    to be its local rows and the corresponding columns (a square submatrix);
3219    each processor's off-diagonal portion encompasses the remainder of the
3220    local matrix (a rectangular submatrix).
3221 
3222    The user can specify preallocated storage for the diagonal part of
3223    the local submatrix with either d_nz or d_nnz (not both).  Set
3224    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3225    memory allocation.  Likewise, specify preallocated storage for the
3226    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3227 
3228    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3229    the figure below we depict these three local rows and all columns (0-11).
3230 
3231 .vb
3232            0 1 2 3 4 5 6 7 8 9 10 11
3233           -------------------
3234    row 3  |  o o o d d d o o o o o o
3235    row 4  |  o o o d d d o o o o o o
3236    row 5  |  o o o d d d o o o o o o
3237           -------------------
3238 .ve
3239 
3240    Thus, any entries in the d locations are stored in the d (diagonal)
3241    submatrix, and any entries in the o locations are stored in the
3242    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3243    stored simply in the MATSEQBAIJ format for compressed row storage.
3244 
3245    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3246    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3247    In general, for PDE problems in which most nonzeros are near the diagonal,
3248    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3249    or you will get TERRIBLE performance; see the users' manual chapter on
3250    matrices.
3251 
3252    Level: intermediate
3253 
3254 .keywords: matrix, block, aij, compressed row, sparse, parallel
3255 
3256 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3257 @*/
3258 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3259 {
3260   PetscErrorCode ierr;
3261   PetscMPIInt    size;
3262 
3263   PetscFunctionBegin;
3264   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3265   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3266   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3267   if (size > 1) {
3268     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3269     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3270   } else {
3271     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3272     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3273   }
3274   PetscFunctionReturn(0);
3275 }
3276 
3277 #undef __FUNCT__
3278 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3279 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3280 {
3281   Mat            mat;
3282   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3283   PetscErrorCode ierr;
3284   PetscInt       len=0;
3285 
3286   PetscFunctionBegin;
3287   *newmat       = 0;
3288   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3289   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3290   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3291   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3292 
3293   mat->factor       = matin->factor;
3294   mat->preallocated = PETSC_TRUE;
3295   mat->assembled    = PETSC_TRUE;
3296   mat->insertmode   = NOT_SET_VALUES;
3297 
3298   a      = (Mat_MPIBAIJ*)mat->data;
3299   mat->rmap->bs  = matin->rmap->bs;
3300   a->bs2   = oldmat->bs2;
3301   a->mbs   = oldmat->mbs;
3302   a->nbs   = oldmat->nbs;
3303   a->Mbs   = oldmat->Mbs;
3304   a->Nbs   = oldmat->Nbs;
3305 
3306   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3307   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3308 
3309   a->size         = oldmat->size;
3310   a->rank         = oldmat->rank;
3311   a->donotstash   = oldmat->donotstash;
3312   a->roworiented  = oldmat->roworiented;
3313   a->rowindices   = 0;
3314   a->rowvalues    = 0;
3315   a->getrowactive = PETSC_FALSE;
3316   a->barray       = 0;
3317   a->rstartbs     = oldmat->rstartbs;
3318   a->rendbs       = oldmat->rendbs;
3319   a->cstartbs     = oldmat->cstartbs;
3320   a->cendbs       = oldmat->cendbs;
3321 
3322   /* hash table stuff */
3323   a->ht           = 0;
3324   a->hd           = 0;
3325   a->ht_size      = 0;
3326   a->ht_flag      = oldmat->ht_flag;
3327   a->ht_fact      = oldmat->ht_fact;
3328   a->ht_total_ct  = 0;
3329   a->ht_insert_ct = 0;
3330 
3331   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3332   if (oldmat->colmap) {
3333 #if defined (PETSC_USE_CTABLE)
3334   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3335 #else
3336   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3337   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3338   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3339 #endif
3340   } else a->colmap = 0;
3341 
3342   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3343     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3344     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3345     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3346   } else a->garray = 0;
3347 
3348   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3349   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3350   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3351   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3352   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3353 
3354   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3355   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3356   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3357   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3358   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3359   *newmat = mat;
3360 
3361   PetscFunctionReturn(0);
3362 }
3363 
3364 #include "petscsys.h"
3365 
3366 #undef __FUNCT__
3367 #define __FUNCT__ "MatLoad_MPIBAIJ"
3368 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
3369 {
3370   Mat            A;
3371   PetscErrorCode ierr;
3372   int            fd;
3373   PetscInt       i,nz,j,rstart,rend;
3374   PetscScalar    *vals,*buf;
3375   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3376   MPI_Status     status;
3377   PetscMPIInt    rank,size,maxnz;
3378   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3379   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3380   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3381   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3382   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3383   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3384 
3385   PetscFunctionBegin;
3386   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3387     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3388   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3389 
3390   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3391   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3392   if (!rank) {
3393     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3394     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3395     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3396   }
3397 
3398   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3399   M = header[1]; N = header[2];
3400 
3401   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3402 
3403   /*
3404      This code adds extra rows to make sure the number of rows is
3405      divisible by the blocksize
3406   */
3407   Mbs        = M/bs;
3408   extra_rows = bs - M + bs*Mbs;
3409   if (extra_rows == bs) extra_rows = 0;
3410   else                  Mbs++;
3411   if (extra_rows && !rank) {
3412     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3413   }
3414 
3415   /* determine ownership of all rows */
3416   mbs        = Mbs/size + ((Mbs % size) > rank);
3417   m          = mbs*bs;
3418   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3419   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3420 
3421   /* process 0 needs enough room for process with most rows */
3422   if (!rank) {
3423     mmax = rowners[1];
3424     for (i=2; i<size; i++) {
3425       mmax = PetscMax(mmax,rowners[i]);
3426     }
3427     mmax*=bs;
3428   } else mmax = m;
3429 
3430   rowners[0] = 0;
3431   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3432   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3433   rstart = rowners[rank];
3434   rend   = rowners[rank+1];
3435 
3436   /* distribute row lengths to all processors */
3437   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3438   if (!rank) {
3439     mend = m;
3440     if (size == 1) mend = mend - extra_rows;
3441     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3442     for (j=mend; j<m; j++) locrowlens[j] = 1;
3443     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3444     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3445     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3446     for (j=0; j<m; j++) {
3447       procsnz[0] += locrowlens[j];
3448     }
3449     for (i=1; i<size; i++) {
3450       mend = browners[i+1] - browners[i];
3451       if (i == size-1) mend = mend - extra_rows;
3452       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3453       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3454       /* calculate the number of nonzeros on each processor */
3455       for (j=0; j<browners[i+1]-browners[i]; j++) {
3456         procsnz[i] += rowlengths[j];
3457       }
3458       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3459     }
3460     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3461   } else {
3462     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3463   }
3464 
3465   if (!rank) {
3466     /* determine max buffer needed and allocate it */
3467     maxnz = procsnz[0];
3468     for (i=1; i<size; i++) {
3469       maxnz = PetscMax(maxnz,procsnz[i]);
3470     }
3471     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3472 
3473     /* read in my part of the matrix column indices  */
3474     nz     = procsnz[0];
3475     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3476     mycols = ibuf;
3477     if (size == 1)  nz -= extra_rows;
3478     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3479     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3480 
3481     /* read in every ones (except the last) and ship off */
3482     for (i=1; i<size-1; i++) {
3483       nz   = procsnz[i];
3484       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3485       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3486     }
3487     /* read in the stuff for the last proc */
3488     if (size != 1) {
3489       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3490       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3491       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3492       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3493     }
3494     ierr = PetscFree(cols);CHKERRQ(ierr);
3495   } else {
3496     /* determine buffer space needed for message */
3497     nz = 0;
3498     for (i=0; i<m; i++) {
3499       nz += locrowlens[i];
3500     }
3501     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3502     mycols = ibuf;
3503     /* receive message of column indices*/
3504     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3505     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3506     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3507   }
3508 
3509   /* loop over local rows, determining number of off diagonal entries */
3510   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3511   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3512   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3513   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3514   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3515   rowcount = 0; nzcount = 0;
3516   for (i=0; i<mbs; i++) {
3517     dcount  = 0;
3518     odcount = 0;
3519     for (j=0; j<bs; j++) {
3520       kmax = locrowlens[rowcount];
3521       for (k=0; k<kmax; k++) {
3522         tmp = mycols[nzcount++]/bs;
3523         if (!mask[tmp]) {
3524           mask[tmp] = 1;
3525           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3526           else masked1[dcount++] = tmp;
3527         }
3528       }
3529       rowcount++;
3530     }
3531 
3532     dlens[i]  = dcount;
3533     odlens[i] = odcount;
3534 
3535     /* zero out the mask elements we set */
3536     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3537     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3538   }
3539 
3540   /* create our matrix */
3541   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3542   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3543   ierr = MatSetType(A,type);CHKERRQ(ierr)
3544   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3545 
3546   if (!rank) {
3547     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3548     /* read in my part of the matrix numerical values  */
3549     nz = procsnz[0];
3550     vals = buf;
3551     mycols = ibuf;
3552     if (size == 1)  nz -= extra_rows;
3553     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3554     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3555 
3556     /* insert into matrix */
3557     jj      = rstart*bs;
3558     for (i=0; i<m; i++) {
3559       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3560       mycols += locrowlens[i];
3561       vals   += locrowlens[i];
3562       jj++;
3563     }
3564     /* read in other processors (except the last one) and ship out */
3565     for (i=1; i<size-1; i++) {
3566       nz   = procsnz[i];
3567       vals = buf;
3568       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3569       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3570     }
3571     /* the last proc */
3572     if (size != 1){
3573       nz   = procsnz[i] - extra_rows;
3574       vals = buf;
3575       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3576       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3577       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3578     }
3579     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3580   } else {
3581     /* receive numeric values */
3582     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3583 
3584     /* receive message of values*/
3585     vals   = buf;
3586     mycols = ibuf;
3587     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3588     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3589     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3590 
3591     /* insert into matrix */
3592     jj      = rstart*bs;
3593     for (i=0; i<m; i++) {
3594       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3595       mycols += locrowlens[i];
3596       vals   += locrowlens[i];
3597       jj++;
3598     }
3599   }
3600   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3601   ierr = PetscFree(buf);CHKERRQ(ierr);
3602   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3603   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3604   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3605   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3606   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3607   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3608 
3609   *newmat = A;
3610   PetscFunctionReturn(0);
3611 }
3612 
3613 #undef __FUNCT__
3614 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3615 /*@
3616    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3617 
3618    Input Parameters:
3619 .  mat  - the matrix
3620 .  fact - factor
3621 
3622    Collective on Mat
3623 
3624    Level: advanced
3625 
3626   Notes:
3627    This can also be set by the command line option: -mat_use_hash_table <fact>
3628 
3629 .keywords: matrix, hashtable, factor, HT
3630 
3631 .seealso: MatSetOption()
3632 @*/
3633 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3634 {
3635   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3636 
3637   PetscFunctionBegin;
3638   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3639   if (f) {
3640     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3641   }
3642   PetscFunctionReturn(0);
3643 }
3644 
3645 EXTERN_C_BEGIN
3646 #undef __FUNCT__
3647 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3648 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3649 {
3650   Mat_MPIBAIJ *baij;
3651 
3652   PetscFunctionBegin;
3653   baij = (Mat_MPIBAIJ*)mat->data;
3654   baij->ht_fact = fact;
3655   PetscFunctionReturn(0);
3656 }
3657 EXTERN_C_END
3658 
3659 #undef __FUNCT__
3660 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3661 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3662 {
3663   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3664   PetscFunctionBegin;
3665   *Ad     = a->A;
3666   *Ao     = a->B;
3667   *colmap = a->garray;
3668   PetscFunctionReturn(0);
3669 }
3670 
3671 /*
3672     Special version for direct calls from Fortran (to eliminate two function call overheads
3673 */
3674 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3675 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3676 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3677 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3678 #endif
3679 
3680 #undef __FUNCT__
3681 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3682 /*@C
3683   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3684 
3685   Collective on Mat
3686 
3687   Input Parameters:
3688 + mat - the matrix
3689 . min - number of input rows
3690 . im - input rows
3691 . nin - number of input columns
3692 . in - input columns
3693 . v - numerical values input
3694 - addvin - INSERT_VALUES or ADD_VALUES
3695 
3696   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3697 
3698   Level: advanced
3699 
3700 .seealso:   MatSetValuesBlocked()
3701 @*/
3702 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3703 {
3704   /* convert input arguments to C version */
3705   Mat             mat = *matin;
3706   PetscInt        m = *min, n = *nin;
3707   InsertMode      addv = *addvin;
3708 
3709   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3710   const MatScalar *value;
3711   MatScalar       *barray=baij->barray;
3712   PetscTruth      roworiented = baij->roworiented;
3713   PetscErrorCode  ierr;
3714   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3715   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3716   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3717 
3718   PetscFunctionBegin;
3719   /* tasks normally handled by MatSetValuesBlocked() */
3720   if (mat->insertmode == NOT_SET_VALUES) {
3721     mat->insertmode = addv;
3722   }
3723 #if defined(PETSC_USE_DEBUG)
3724   else if (mat->insertmode != addv) {
3725     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3726   }
3727   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3728 #endif
3729   if (mat->assembled) {
3730     mat->was_assembled = PETSC_TRUE;
3731     mat->assembled     = PETSC_FALSE;
3732   }
3733   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3734 
3735 
3736   if(!barray) {
3737     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3738     baij->barray = barray;
3739   }
3740 
3741   if (roworiented) {
3742     stepval = (n-1)*bs;
3743   } else {
3744     stepval = (m-1)*bs;
3745   }
3746   for (i=0; i<m; i++) {
3747     if (im[i] < 0) continue;
3748 #if defined(PETSC_USE_DEBUG)
3749     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3750 #endif
3751     if (im[i] >= rstart && im[i] < rend) {
3752       row = im[i] - rstart;
3753       for (j=0; j<n; j++) {
3754         /* If NumCol = 1 then a copy is not required */
3755         if ((roworiented) && (n == 1)) {
3756           barray = (MatScalar*)v + i*bs2;
3757         } else if((!roworiented) && (m == 1)) {
3758           barray = (MatScalar*)v + j*bs2;
3759         } else { /* Here a copy is required */
3760           if (roworiented) {
3761             value = v + i*(stepval+bs)*bs + j*bs;
3762           } else {
3763             value = v + j*(stepval+bs)*bs + i*bs;
3764           }
3765           for (ii=0; ii<bs; ii++,value+=stepval) {
3766             for (jj=0; jj<bs; jj++) {
3767               *barray++  = *value++;
3768             }
3769           }
3770           barray -=bs2;
3771         }
3772 
3773         if (in[j] >= cstart && in[j] < cend){
3774           col  = in[j] - cstart;
3775           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3776         }
3777         else if (in[j] < 0) continue;
3778 #if defined(PETSC_USE_DEBUG)
3779         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
3780 #endif
3781         else {
3782           if (mat->was_assembled) {
3783             if (!baij->colmap) {
3784               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3785             }
3786 
3787 #if defined(PETSC_USE_DEBUG)
3788 #if defined (PETSC_USE_CTABLE)
3789             { PetscInt data;
3790               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3791               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3792             }
3793 #else
3794             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3795 #endif
3796 #endif
3797 #if defined (PETSC_USE_CTABLE)
3798 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3799             col  = (col - 1)/bs;
3800 #else
3801             col = (baij->colmap[in[j]] - 1)/bs;
3802 #endif
3803             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3804               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3805               col =  in[j];
3806             }
3807           }
3808           else col = in[j];
3809           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3810         }
3811       }
3812     } else {
3813       if (!baij->donotstash) {
3814         if (roworiented) {
3815           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3816         } else {
3817           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3818         }
3819       }
3820     }
3821   }
3822 
3823   /* task normally handled by MatSetValuesBlocked() */
3824   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3825   PetscFunctionReturn(0);
3826 }
3827