xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision f533fdf2fac09d161b4cb34a79e3a3bdb8821e2d)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=baij->rstart_bs;
218   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
219   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc=0;
238   MatScalar     *v_loc=0;
239 
240   PetscFunctionBegin;
241 
242   if(!baij->donotstash){
243     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
244     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
245   }
246 
247   for (i=0; i<m; i++) {
248     if (im[i] < 0) continue;
249 #if defined(PETSC_USE_DEBUG)
250     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
251 #endif
252     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
253       row = im[i] - rstart_orig;              /* local row index */
254       for (j=0; j<n; j++) {
255         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
256         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
257           col = in[j] - cstart_orig;          /* local col index */
258           brow = row/bs; bcol = col/bs;
259           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
260           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
261           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         } else if (in[j] < 0) continue;
264 #if defined(PETSC_USE_DEBUG)
265         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
266 #endif
267         else {  /* off-diag entry (B) */
268           if (mat->was_assembled) {
269             if (!baij->colmap) {
270               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
271             }
272 #if defined (PETSC_USE_CTABLE)
273             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
274             col  = col - 1;
275 #else
276             col = baij->colmap[in[j]/bs] - 1;
277 #endif
278             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
279               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
280               col =  in[j];
281               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
282               B = baij->B;
283               b = (Mat_SeqBAIJ*)(B)->data;
284               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
285               ba=b->a;
286             } else col += in[j]%bs;
287           } else col = in[j];
288           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
289           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
290           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
291         }
292       }
293     } else {  /* off processor entry */
294       if (!baij->donotstash) {
295         n_loc = 0;
296         for (j=0; j<n; j++){
297           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
298           in_loc[n_loc] = in[j];
299           if (roworiented) {
300             v_loc[n_loc] = v[i*n+j];
301           } else {
302             v_loc[n_loc] = v[j*m+i];
303           }
304           n_loc++;
305         }
306         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
307       }
308     }
309   }
310 
311   if(!baij->donotstash){
312     ierr = PetscFree(in_loc);CHKERRQ(ierr);
313     ierr = PetscFree(v_loc);CHKERRQ(ierr);
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
321 {
322   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
323   const MatScalar *value;
324   MatScalar       *barray=baij->barray;
325   PetscTruth      roworiented = baij->roworiented;
326   PetscErrorCode  ierr;
327   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
328   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
329   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
330 
331   PetscFunctionBegin;
332   if(!barray) {
333     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
334     baij->barray = barray;
335   }
336 
337   if (roworiented) {
338     stepval = (n-1)*bs;
339   } else {
340     stepval = (m-1)*bs;
341   }
342   for (i=0; i<m; i++) {
343     if (im[i] < 0) continue;
344 #if defined(PETSC_USE_DEBUG)
345     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
346 #endif
347     if (im[i] >= rstart && im[i] < rend) {
348       row = im[i] - rstart;
349       for (j=0; j<n; j++) {
350         /* If NumCol = 1 then a copy is not required */
351         if ((roworiented) && (n == 1)) {
352           barray = (MatScalar*) v + i*bs2;
353         } else if((!roworiented) && (m == 1)) {
354           barray = (MatScalar*) v + j*bs2;
355         } else { /* Here a copy is required */
356           if (roworiented) {
357             value = v + i*(stepval+bs)*bs + j*bs;
358           } else {
359             value = v + j*(stepval+bs)*bs + i*bs;
360           }
361           for (ii=0; ii<bs; ii++,value+=stepval) {
362             for (jj=0; jj<bs; jj++) {
363               *barray++  = *value++;
364             }
365           }
366           barray -=bs2;
367         }
368 
369         if (in[j] >= cstart && in[j] < cend){
370           col  = in[j] - cstart;
371           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
372         }
373         else if (in[j] < 0) continue;
374 #if defined(PETSC_USE_DEBUG)
375         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
376 #endif
377         else {
378           if (mat->was_assembled) {
379             if (!baij->colmap) {
380               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381             }
382 
383 #if defined(PETSC_USE_DEBUG)
384 #if defined (PETSC_USE_CTABLE)
385             { PetscInt data;
386               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
387               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
388             }
389 #else
390             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
391 #endif
392 #endif
393 #if defined (PETSC_USE_CTABLE)
394 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
395             col  = (col - 1)/bs;
396 #else
397             col = (baij->colmap[in[j]] - 1)/bs;
398 #endif
399             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
400               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
401               col =  in[j];
402             }
403           }
404           else col = in[j];
405           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
406         }
407       }
408     } else {
409       if (!baij->donotstash) {
410         if (roworiented) {
411           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         } else {
413           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
414         }
415       }
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatGetValues_MPISBAIJ"
423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
424 {
425   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
426   PetscErrorCode ierr;
427   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
428   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
429 
430   PetscFunctionBegin;
431   for (i=0; i<m; i++) {
432     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
433     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
434     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
435       row = idxm[i] - bsrstart;
436       for (j=0; j<n; j++) {
437         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
438         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
439         if (idxn[j] >= bscstart && idxn[j] < bscend){
440           col = idxn[j] - bscstart;
441           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442         } else {
443           if (!baij->colmap) {
444             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
445           }
446 #if defined (PETSC_USE_CTABLE)
447           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
448           data --;
449 #else
450           data = baij->colmap[idxn[j]/bs]-1;
451 #endif
452           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
453           else {
454             col  = data + idxn[j]%bs;
455             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
456           }
457         }
458       }
459     } else {
460       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
461     }
462   }
463  PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatNorm_MPISBAIJ"
468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
469 {
470   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
471   PetscErrorCode ierr;
472   PetscReal      sum[2],*lnorm2;
473 
474   PetscFunctionBegin;
475   if (baij->size == 1) {
476     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
477   } else {
478     if (type == NORM_FROBENIUS) {
479       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
480       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
481       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
482       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
483       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
484       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
485       *norm = sqrt(sum[0] + 2*sum[1]);
486       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
487     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
488       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
489       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
490       PetscReal    *rsum,*rsum2,vabs;
491       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
492       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
493       MatScalar    *v;
494 
495       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
496       rsum2 = rsum + mat->N;
497       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
498       /* Amat */
499       v = amat->a; jj = amat->j;
500       for (brow=0; brow<mbs; brow++) {
501         grow = bs*(rstart + brow);
502         nz = amat->i[brow+1] - amat->i[brow];
503         for (bcol=0; bcol<nz; bcol++){
504           gcol = bs*(rstart + *jj); jj++;
505           for (col=0; col<bs; col++){
506             for (row=0; row<bs; row++){
507               vabs = PetscAbsScalar(*v); v++;
508               rsum[gcol+col] += vabs;
509               /* non-diagonal block */
510               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
511             }
512           }
513         }
514       }
515       /* Bmat */
516       v = bmat->a; jj = bmat->j;
517       for (brow=0; brow<mbs; brow++) {
518         grow = bs*(rstart + brow);
519         nz = bmat->i[brow+1] - bmat->i[brow];
520         for (bcol=0; bcol<nz; bcol++){
521           gcol = bs*garray[*jj]; jj++;
522           for (col=0; col<bs; col++){
523             for (row=0; row<bs; row++){
524               vabs = PetscAbsScalar(*v); v++;
525               rsum[gcol+col] += vabs;
526               rsum[grow+row] += vabs;
527             }
528           }
529         }
530       }
531       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
532       *norm = 0.0;
533       for (col=0; col<mat->N; col++) {
534         if (rsum2[col] > *norm) *norm = rsum2[col];
535       }
536       ierr = PetscFree(rsum);CHKERRQ(ierr);
537     } else {
538       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
539     }
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
547 {
548   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
549   PetscErrorCode ierr;
550   PetscInt       nstash,reallocs;
551   InsertMode     addv;
552 
553   PetscFunctionBegin;
554   if (baij->donotstash) {
555     PetscFunctionReturn(0);
556   }
557 
558   /* make sure all processors are either in INSERTMODE or ADDMODE */
559   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
560   if (addv == (ADD_VALUES|INSERT_VALUES)) {
561     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
562   }
563   mat->insertmode = addv; /* in case this processor had no cache */
564 
565   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
566   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
567   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
568   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
569   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
570   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
577 {
578   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
579   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
580   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
581   PetscErrorCode ierr;
582   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
583   PetscInt       *row,*col,other_disassembled;
584   PetscMPIInt    n;
585   PetscTruth     r1,r2,r3;
586   MatScalar      *val;
587   InsertMode     addv = mat->insertmode;
588 
589   PetscFunctionBegin;
590 
591   if (!baij->donotstash) {
592     while (1) {
593       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
594       if (!flg) break;
595 
596       for (i=0; i<n;) {
597         /* Now identify the consecutive vals belonging to the same row */
598         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
599         if (j < n) ncols = j-i;
600         else       ncols = n-i;
601         /* Now assemble all these values with a single function call */
602         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
603         i = j;
604       }
605     }
606     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
607     /* Now process the block-stash. Since the values are stashed column-oriented,
608        set the roworiented flag to column oriented, and after MatSetValues()
609        restore the original flags */
610     r1 = baij->roworiented;
611     r2 = a->roworiented;
612     r3 = b->roworiented;
613     baij->roworiented = PETSC_FALSE;
614     a->roworiented    = PETSC_FALSE;
615     b->roworiented    = PETSC_FALSE;
616     while (1) {
617       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
618       if (!flg) break;
619 
620       for (i=0; i<n;) {
621         /* Now identify the consecutive vals belonging to the same row */
622         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
623         if (j < n) ncols = j-i;
624         else       ncols = n-i;
625         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
626         i = j;
627       }
628     }
629     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
630     baij->roworiented = r1;
631     a->roworiented    = r2;
632     b->roworiented    = r3;
633   }
634 
635   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
636   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
637 
638   /* determine if any processor has disassembled, if so we must
639      also disassemble ourselfs, in order that we may reassemble. */
640   /*
641      if nonzero structure of submatrix B cannot change then we know that
642      no processor disassembled thus we can skip this stuff
643   */
644   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
645     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
646     if (mat->was_assembled && !other_disassembled) {
647       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
648     }
649   }
650 
651   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
652     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
653   }
654   b->compressedrow.use = PETSC_TRUE;
655   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
656   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
657 
658   if (baij->rowvalues) {
659     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
660     baij->rowvalues = 0;
661   }
662 
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
669 {
670   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode    ierr;
672   PetscInt          bs = mat->bs;
673   PetscMPIInt       size = baij->size,rank = baij->rank;
674   PetscTruth        iascii,isdraw;
675   PetscViewer       sviewer;
676   PetscViewerFormat format;
677 
678   PetscFunctionBegin;
679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
680   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
681   if (iascii) {
682     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
683     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684       MatInfo info;
685       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
686       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
687       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
688               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
689               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
690       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
691       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
692       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
694       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
695       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
696       PetscFunctionReturn(0);
697     } else if (format == PETSC_VIEWER_ASCII_INFO) {
698       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
701       PetscFunctionReturn(0);
702     }
703   }
704 
705   if (isdraw) {
706     PetscDraw       draw;
707     PetscTruth isnull;
708     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
709     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
710   }
711 
712   if (size == 1) {
713     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
714     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
715   } else {
716     /* assemble the entire matrix onto first processor. */
717     Mat         A;
718     Mat_SeqSBAIJ *Aloc;
719     Mat_SeqBAIJ *Bloc;
720     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
721     MatScalar   *a;
722 
723     /* Should this be the same type as mat? */
724     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
725     if (!rank) {
726       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
727     } else {
728       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
729     }
730     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
731     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
732     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
733 
734     /* copy over the A part */
735     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
736     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
737     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
738 
739     for (i=0; i<mbs; i++) {
740       rvals[0] = bs*(baij->rstart + i);
741       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
742       for (j=ai[i]; j<ai[i+1]; j++) {
743         col = (baij->cstart+aj[j])*bs;
744         for (k=0; k<bs; k++) {
745           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
746           col++; a += bs;
747         }
748       }
749     }
750     /* copy over the B part */
751     Bloc = (Mat_SeqBAIJ*)baij->B->data;
752     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
753     for (i=0; i<mbs; i++) {
754       rvals[0] = bs*(baij->rstart + i);
755       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
756       for (j=ai[i]; j<ai[i+1]; j++) {
757         col = baij->garray[aj[j]]*bs;
758         for (k=0; k<bs; k++) {
759           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
760           col++; a += bs;
761         }
762       }
763     }
764     ierr = PetscFree(rvals);CHKERRQ(ierr);
765     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
766     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
767     /*
768        Everyone has to call to draw the matrix since the graphics waits are
769        synchronized across all processors that share the PetscDraw object
770     */
771     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
772     if (!rank) {
773       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
774       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
775     }
776     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
777     ierr = MatDestroy(A);CHKERRQ(ierr);
778   }
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "MatView_MPISBAIJ"
784 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
785 {
786   PetscErrorCode ierr;
787   PetscTruth     iascii,isdraw,issocket,isbinary;
788 
789   PetscFunctionBegin;
790   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
791   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
792   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
793   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
794   if (iascii || isdraw || issocket || isbinary) {
795     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
796   } else {
797     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatDestroy_MPISBAIJ"
804 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
805 {
806   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
807   PetscErrorCode ierr;
808 
809   PetscFunctionBegin;
810 #if defined(PETSC_USE_LOG)
811   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
812 #endif
813   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
814   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
815   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
816   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
817   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
818 #if defined (PETSC_USE_CTABLE)
819   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
820 #else
821   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
822 #endif
823   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
824   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
825   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
826   if (baij->slvec0) {
827     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
828     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
829   }
830   if (baij->slvec1) {
831     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
832     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
833     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
834   }
835   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
836   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
837   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
838   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
839 #if defined(PETSC_USE_MAT_SINGLE)
840   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
841 #endif
842   ierr = PetscFree(baij);CHKERRQ(ierr);
843 
844   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
845   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
846   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
847   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatMult_MPISBAIJ"
853 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
854 {
855   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
856   PetscErrorCode ierr;
857   PetscInt       nt,mbs=a->mbs,bs=A->bs;
858   PetscScalar    *x,*from,zero=0.0;
859 
860   PetscFunctionBegin;
861   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
862   if (nt != A->n) {
863     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
864   }
865   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
866   if (nt != A->m) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
868   }
869 
870   /* diagonal part */
871   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
872   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
873 
874   /* subdiagonal part */
875   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
876 
877   /* copy x into the vec slvec0 */
878   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
879   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
880   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
881   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
882 
883   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
884   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
885   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
886 
887   /* supperdiagonal part */
888   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
889 
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
895 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
896 {
897   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
898   PetscErrorCode ierr;
899   PetscInt       nt;
900 
901   PetscFunctionBegin;
902   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
903   if (nt != A->n) {
904     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
905   }
906   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
907   if (nt != A->m) {
908     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
909   }
910 
911   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
912   /* do diagonal part */
913   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
914   /* do supperdiagonal part */
915   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
916   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
917   /* do subdiagonal part */
918   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
919   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
920   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
921 
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
927 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
928 {
929   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
930   PetscErrorCode ierr;
931   PetscInt       mbs=a->mbs,bs=A->bs;
932   PetscScalar    *x,*from,zero=0.0;
933 
934   PetscFunctionBegin;
935   /*
936   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
937   PetscSynchronizedFlush(A->comm);
938   */
939   /* diagonal part */
940   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
941   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
942 
943   /* subdiagonal part */
944   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
945 
946   /* copy x into the vec slvec0 */
947   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
948   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
949   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
950   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
951 
952   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
953   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
955 
956   /* supperdiagonal part */
957   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
958 
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
964 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
965 {
966   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
967   PetscErrorCode ierr;
968 
969   PetscFunctionBegin;
970   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
971   /* do diagonal part */
972   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
973   /* do supperdiagonal part */
974   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976 
977   /* do subdiagonal part */
978   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
979   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
980   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
981 
982   PetscFunctionReturn(0);
983 }
984 
985 /*
986   This only works correctly for square matrices where the subblock A->A is the
987    diagonal block
988 */
989 #undef __FUNCT__
990 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
991 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
992 {
993   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
994   PetscErrorCode ierr;
995 
996   PetscFunctionBegin;
997   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
998   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatScale_MPISBAIJ"
1004 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1005 {
1006   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1011   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1017 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1018 {
1019   PetscFunctionBegin;
1020   if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1026 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1027 {
1028   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1029 
1030   PetscFunctionBegin;
1031   if (!baij->getrowactive) {
1032     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1033   }
1034   baij->getrowactive = PETSC_FALSE;
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1040 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1041 {
1042   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1047   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1053 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1054 {
1055   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1056   PetscErrorCode ierr;
1057 
1058   PetscFunctionBegin;
1059   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1060   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1066 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1067 {
1068   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1069   PetscErrorCode ierr;
1070 
1071   PetscFunctionBegin;
1072   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1073   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1079 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1080 {
1081   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1082   Mat            A = a->A,B = a->B;
1083   PetscErrorCode ierr;
1084   PetscReal      isend[5],irecv[5];
1085 
1086   PetscFunctionBegin;
1087   info->block_size     = (PetscReal)matin->bs;
1088   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1089   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1090   isend[3] = info->memory;  isend[4] = info->mallocs;
1091   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1092   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1093   isend[3] += info->memory;  isend[4] += info->mallocs;
1094   if (flag == MAT_LOCAL) {
1095     info->nz_used      = isend[0];
1096     info->nz_allocated = isend[1];
1097     info->nz_unneeded  = isend[2];
1098     info->memory       = isend[3];
1099     info->mallocs      = isend[4];
1100   } else if (flag == MAT_GLOBAL_MAX) {
1101     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1102     info->nz_used      = irecv[0];
1103     info->nz_allocated = irecv[1];
1104     info->nz_unneeded  = irecv[2];
1105     info->memory       = irecv[3];
1106     info->mallocs      = irecv[4];
1107   } else if (flag == MAT_GLOBAL_SUM) {
1108     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1109     info->nz_used      = irecv[0];
1110     info->nz_allocated = irecv[1];
1111     info->nz_unneeded  = irecv[2];
1112     info->memory       = irecv[3];
1113     info->mallocs      = irecv[4];
1114   } else {
1115     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1116   }
1117   info->rows_global       = (PetscReal)A->M;
1118   info->columns_global    = (PetscReal)A->N;
1119   info->rows_local        = (PetscReal)A->m;
1120   info->columns_local     = (PetscReal)A->N;
1121   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1122   info->fill_ratio_needed = 0;
1123   info->factor_mallocs    = 0;
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1129 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1130 {
1131   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   switch (op) {
1136   case MAT_NO_NEW_NONZERO_LOCATIONS:
1137   case MAT_YES_NEW_NONZERO_LOCATIONS:
1138   case MAT_COLUMNS_UNSORTED:
1139   case MAT_COLUMNS_SORTED:
1140   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1141   case MAT_KEEP_ZEROED_ROWS:
1142   case MAT_NEW_NONZERO_LOCATION_ERR:
1143     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1144     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1145     break;
1146   case MAT_ROW_ORIENTED:
1147     a->roworiented = PETSC_TRUE;
1148     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1149     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1150     break;
1151   case MAT_ROWS_SORTED:
1152   case MAT_ROWS_UNSORTED:
1153   case MAT_YES_NEW_DIAGONALS:
1154     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1155     break;
1156   case MAT_COLUMN_ORIENTED:
1157     a->roworiented = PETSC_FALSE;
1158     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1159     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1160     break;
1161   case MAT_IGNORE_OFF_PROC_ENTRIES:
1162     a->donotstash = PETSC_TRUE;
1163     break;
1164   case MAT_NO_NEW_DIAGONALS:
1165     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1166   case MAT_USE_HASH_TABLE:
1167     a->ht_flag = PETSC_TRUE;
1168     break;
1169   case MAT_NOT_SYMMETRIC:
1170   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1171   case MAT_HERMITIAN:
1172     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1173   case MAT_SYMMETRIC:
1174   case MAT_STRUCTURALLY_SYMMETRIC:
1175   case MAT_NOT_HERMITIAN:
1176   case MAT_SYMMETRY_ETERNAL:
1177   case MAT_NOT_SYMMETRY_ETERNAL:
1178     break;
1179   default:
1180     SETERRQ(PETSC_ERR_SUP,"unknown option");
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1187 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1188 {
1189   PetscErrorCode ierr;
1190   PetscFunctionBegin;
1191   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1197 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1198 {
1199   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1200   Mat            a=baij->A, b=baij->B;
1201   PetscErrorCode ierr;
1202   PetscInt       nv,m,n;
1203   PetscTruth     flg;
1204 
1205   PetscFunctionBegin;
1206   if (ll != rr){
1207     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1208     if (!flg)
1209       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1210   }
1211   if (!ll) PetscFunctionReturn(0);
1212 
1213   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1214   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1215 
1216   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1217   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1218 
1219   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1220 
1221   /* left diagonalscale the off-diagonal part */
1222   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1223 
1224   /* scale the diagonal part */
1225   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1226 
1227   /* right diagonalscale the off-diagonal part */
1228   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1229   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1235 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1236 {
1237   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1238   MPI_Comm          comm = A->comm;
1239   static PetscTruth called = PETSC_FALSE;
1240   PetscErrorCode    ierr;
1241 
1242   PetscFunctionBegin;
1243   if (!a->rank) {
1244     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1245   }
1246   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1247   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1248   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1254 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1255 {
1256   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatEqual_MPISBAIJ"
1268 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1269 {
1270   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1271   Mat            a,b,c,d;
1272   PetscTruth     flg;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276   a = matA->A; b = matA->B;
1277   c = matB->A; d = matB->B;
1278 
1279   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1280   if (flg) {
1281     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1282   }
1283   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatCopy_MPISBAIJ"
1289 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1290 {
1291   PetscErrorCode ierr;
1292   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1293   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1294 
1295   PetscFunctionBegin;
1296   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1297   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1298     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1299   } else {
1300     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1301     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1302   }
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1308 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1309 {
1310   PetscErrorCode ierr;
1311 
1312   PetscFunctionBegin;
1313   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #include "petscblaslapack.h"
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1320 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1321 {
1322   PetscErrorCode ierr;
1323   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1324   PetscBLASInt   bnz,one=1;
1325   Mat_SeqSBAIJ   *xa,*ya;
1326   Mat_SeqBAIJ    *xb,*yb;
1327 
1328   PetscFunctionBegin;
1329   if (str == SAME_NONZERO_PATTERN) {
1330     PetscScalar alpha = a;
1331     xa = (Mat_SeqSBAIJ *)xx->A->data;
1332     ya = (Mat_SeqSBAIJ *)yy->A->data;
1333     bnz = (PetscBLASInt)xa->nz;
1334     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1335     xb = (Mat_SeqBAIJ *)xx->B->data;
1336     yb = (Mat_SeqBAIJ *)yy->B->data;
1337     bnz = (PetscBLASInt)xb->nz;
1338     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1339   } else {
1340     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1341   }
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1347 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1348 {
1349   PetscErrorCode ierr;
1350   PetscInt       i;
1351   PetscTruth     flg;
1352 
1353   PetscFunctionBegin;
1354   for (i=0; i<n; i++) {
1355     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1356     if (!flg) {
1357       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1358     }
1359   }
1360   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 
1365 /* -------------------------------------------------------------------*/
1366 static struct _MatOps MatOps_Values = {
1367        MatSetValues_MPISBAIJ,
1368        MatGetRow_MPISBAIJ,
1369        MatRestoreRow_MPISBAIJ,
1370        MatMult_MPISBAIJ,
1371 /* 4*/ MatMultAdd_MPISBAIJ,
1372        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1373        MatMultAdd_MPISBAIJ,
1374        0,
1375        0,
1376        0,
1377 /*10*/ 0,
1378        0,
1379        0,
1380        MatRelax_MPISBAIJ,
1381        MatTranspose_MPISBAIJ,
1382 /*15*/ MatGetInfo_MPISBAIJ,
1383        MatEqual_MPISBAIJ,
1384        MatGetDiagonal_MPISBAIJ,
1385        MatDiagonalScale_MPISBAIJ,
1386        MatNorm_MPISBAIJ,
1387 /*20*/ MatAssemblyBegin_MPISBAIJ,
1388        MatAssemblyEnd_MPISBAIJ,
1389        0,
1390        MatSetOption_MPISBAIJ,
1391        MatZeroEntries_MPISBAIJ,
1392 /*25*/ 0,
1393        0,
1394        0,
1395        0,
1396        0,
1397 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1398        0,
1399        0,
1400        0,
1401        0,
1402 /*35*/ MatDuplicate_MPISBAIJ,
1403        0,
1404        0,
1405        0,
1406        0,
1407 /*40*/ MatAXPY_MPISBAIJ,
1408        MatGetSubMatrices_MPISBAIJ,
1409        MatIncreaseOverlap_MPISBAIJ,
1410        MatGetValues_MPISBAIJ,
1411        MatCopy_MPISBAIJ,
1412 /*45*/ MatPrintHelp_MPISBAIJ,
1413        MatScale_MPISBAIJ,
1414        0,
1415        0,
1416        0,
1417 /*50*/ 0,
1418        0,
1419        0,
1420        0,
1421        0,
1422 /*55*/ 0,
1423        0,
1424        MatSetUnfactored_MPISBAIJ,
1425        0,
1426        MatSetValuesBlocked_MPISBAIJ,
1427 /*60*/ 0,
1428        0,
1429        0,
1430        MatGetPetscMaps_Petsc,
1431        0,
1432 /*65*/ 0,
1433        0,
1434        0,
1435        0,
1436        0,
1437 /*70*/ MatGetRowMax_MPISBAIJ,
1438        0,
1439        0,
1440        0,
1441        0,
1442 /*75*/ 0,
1443        0,
1444        0,
1445        0,
1446        0,
1447 /*80*/ 0,
1448        0,
1449        0,
1450        0,
1451        MatLoad_MPISBAIJ,
1452 /*85*/ 0,
1453        0,
1454        0,
1455        0,
1456        0,
1457 /*90*/ 0,
1458        0,
1459        0,
1460        0,
1461        0,
1462 /*95*/ 0,
1463        0,
1464        0,
1465        0,
1466        0,
1467 /*100*/0,
1468        0,
1469        0,
1470        0,
1471        0,
1472 /*105*/0,
1473        MatRealPart_MPISBAIJ,
1474        MatImaginaryPart_MPISBAIJ
1475 };
1476 
1477 
1478 EXTERN_C_BEGIN
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1481 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1482 {
1483   PetscFunctionBegin;
1484   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1485   *iscopy = PETSC_FALSE;
1486   PetscFunctionReturn(0);
1487 }
1488 EXTERN_C_END
1489 
1490 EXTERN_C_BEGIN
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1493 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1494 {
1495   Mat_MPISBAIJ   *b;
1496   PetscErrorCode ierr;
1497   PetscInt       i,mbs,Mbs;
1498 
1499   PetscFunctionBegin;
1500   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1501 
1502   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1503   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1504   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1505   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1506   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1507   if (d_nnz) {
1508     for (i=0; i<B->m/bs; i++) {
1509       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1510     }
1511   }
1512   if (o_nnz) {
1513     for (i=0; i<B->m/bs; i++) {
1514       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1515     }
1516   }
1517   B->preallocated = PETSC_TRUE;
1518   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1519   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1520   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1521   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1522 
1523   b   = (Mat_MPISBAIJ*)B->data;
1524   mbs = B->m/bs;
1525   Mbs = B->M/bs;
1526   if (mbs*bs != B->m) {
1527     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1528   }
1529 
1530   B->bs  = bs;
1531   b->bs2 = bs*bs;
1532   b->mbs = mbs;
1533   b->nbs = mbs;
1534   b->Mbs = Mbs;
1535   b->Nbs = Mbs;
1536 
1537   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1538   b->rowners[0]    = 0;
1539   for (i=2; i<=b->size; i++) {
1540     b->rowners[i] += b->rowners[i-1];
1541   }
1542   b->rstart    = b->rowners[b->rank];
1543   b->rend      = b->rowners[b->rank+1];
1544   b->cstart    = b->rstart;
1545   b->cend      = b->rend;
1546   for (i=0; i<=b->size; i++) {
1547     b->rowners_bs[i] = b->rowners[i]*bs;
1548   }
1549   b->rstart_bs = b-> rstart*bs;
1550   b->rend_bs   = b->rend*bs;
1551 
1552   b->cstart_bs = b->cstart*bs;
1553   b->cend_bs   = b->cend*bs;
1554 
1555   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1556   ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr);
1557   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1558   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1559   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1560 
1561   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1562   ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr);
1563   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1564   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1565   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1566 
1567   /* build cache for off array entries formed */
1568   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1569 
1570   PetscFunctionReturn(0);
1571 }
1572 EXTERN_C_END
1573 
1574 /*MC
1575    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1576    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1577 
1578    Options Database Keys:
1579 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1580 
1581   Level: beginner
1582 
1583 .seealso: MatCreateMPISBAIJ
1584 M*/
1585 
1586 EXTERN_C_BEGIN
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatCreate_MPISBAIJ"
1589 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1590 {
1591   Mat_MPISBAIJ   *b;
1592   PetscErrorCode ierr;
1593   PetscTruth     flg;
1594 
1595   PetscFunctionBegin;
1596 
1597   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1598   B->data = (void*)b;
1599   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1600 
1601   B->ops->destroy    = MatDestroy_MPISBAIJ;
1602   B->ops->view       = MatView_MPISBAIJ;
1603   B->mapping    = 0;
1604   B->factor     = 0;
1605   B->assembled  = PETSC_FALSE;
1606 
1607   B->insertmode = NOT_SET_VALUES;
1608   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1609   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1610 
1611   /* build local table of row and column ownerships */
1612   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1613   b->cowners    = b->rowners + b->size + 2;
1614   b->rowners_bs = b->cowners + b->size + 2;
1615   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1616 
1617   /* build cache for off array entries formed */
1618   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1619   b->donotstash  = PETSC_FALSE;
1620   b->colmap      = PETSC_NULL;
1621   b->garray      = PETSC_NULL;
1622   b->roworiented = PETSC_TRUE;
1623 
1624 #if defined(PETSC_USE_MAT_SINGLE)
1625   /* stuff for MatSetValues_XXX in single precision */
1626   b->setvalueslen     = 0;
1627   b->setvaluescopy    = PETSC_NULL;
1628 #endif
1629 
1630   /* stuff used in block assembly */
1631   b->barray       = 0;
1632 
1633   /* stuff used for matrix vector multiply */
1634   b->lvec         = 0;
1635   b->Mvctx        = 0;
1636   b->slvec0       = 0;
1637   b->slvec0b      = 0;
1638   b->slvec1       = 0;
1639   b->slvec1a      = 0;
1640   b->slvec1b      = 0;
1641   b->sMvctx       = 0;
1642 
1643   /* stuff for MatGetRow() */
1644   b->rowindices   = 0;
1645   b->rowvalues    = 0;
1646   b->getrowactive = PETSC_FALSE;
1647 
1648   /* hash table stuff */
1649   b->ht           = 0;
1650   b->hd           = 0;
1651   b->ht_size      = 0;
1652   b->ht_flag      = PETSC_FALSE;
1653   b->ht_fact      = 0;
1654   b->ht_total_ct  = 0;
1655   b->ht_insert_ct = 0;
1656 
1657   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1658   if (flg) {
1659     PetscReal fact = 1.39;
1660     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1661     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1662     if (fact <= 1.0) fact = 1.39;
1663     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1664     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1665   }
1666   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1667                                      "MatStoreValues_MPISBAIJ",
1668                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1669   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1670                                      "MatRetrieveValues_MPISBAIJ",
1671                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1672   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1673                                      "MatGetDiagonalBlock_MPISBAIJ",
1674                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1675   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1676                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1677                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1678   B->symmetric                  = PETSC_TRUE;
1679   B->structurally_symmetric     = PETSC_TRUE;
1680   B->symmetric_set              = PETSC_TRUE;
1681   B->structurally_symmetric_set = PETSC_TRUE;
1682   PetscFunctionReturn(0);
1683 }
1684 EXTERN_C_END
1685 
1686 /*MC
1687    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1688 
1689    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1690    and MATMPISBAIJ otherwise.
1691 
1692    Options Database Keys:
1693 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1694 
1695   Level: beginner
1696 
1697 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1698 M*/
1699 
1700 EXTERN_C_BEGIN
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatCreate_SBAIJ"
1703 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1704 {
1705   PetscErrorCode ierr;
1706   PetscMPIInt    size;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1710   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1711   if (size == 1) {
1712     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1713   } else {
1714     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1715   }
1716   PetscFunctionReturn(0);
1717 }
1718 EXTERN_C_END
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1722 /*@C
1723    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1724    the user should preallocate the matrix storage by setting the parameters
1725    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1726    performance can be increased by more than a factor of 50.
1727 
1728    Collective on Mat
1729 
1730    Input Parameters:
1731 +  A - the matrix
1732 .  bs   - size of blockk
1733 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1734            submatrix  (same for all local rows)
1735 .  d_nnz - array containing the number of block nonzeros in the various block rows
1736            in the upper triangular and diagonal part of the in diagonal portion of the local
1737            (possibly different for each block row) or PETSC_NULL.  You must leave room
1738            for the diagonal entry even if it is zero.
1739 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1740            submatrix (same for all local rows).
1741 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1742            off-diagonal portion of the local submatrix (possibly different for
1743            each block row) or PETSC_NULL.
1744 
1745 
1746    Options Database Keys:
1747 .   -mat_no_unroll - uses code that does not unroll the loops in the
1748                      block calculations (much slower)
1749 .   -mat_block_size - size of the blocks to use
1750 
1751    Notes:
1752 
1753    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1754    than it must be used on all processors that share the object for that argument.
1755 
1756    If the *_nnz parameter is given then the *_nz parameter is ignored
1757 
1758    Storage Information:
1759    For a square global matrix we define each processor's diagonal portion
1760    to be its local rows and the corresponding columns (a square submatrix);
1761    each processor's off-diagonal portion encompasses the remainder of the
1762    local matrix (a rectangular submatrix).
1763 
1764    The user can specify preallocated storage for the diagonal part of
1765    the local submatrix with either d_nz or d_nnz (not both).  Set
1766    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1767    memory allocation.  Likewise, specify preallocated storage for the
1768    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1769 
1770    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1771    the figure below we depict these three local rows and all columns (0-11).
1772 
1773 .vb
1774            0 1 2 3 4 5 6 7 8 9 10 11
1775           -------------------
1776    row 3  |  o o o d d d o o o o o o
1777    row 4  |  o o o d d d o o o o o o
1778    row 5  |  o o o d d d o o o o o o
1779           -------------------
1780 .ve
1781 
1782    Thus, any entries in the d locations are stored in the d (diagonal)
1783    submatrix, and any entries in the o locations are stored in the
1784    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1785    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1786 
1787    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1788    plus the diagonal part of the d matrix,
1789    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1790    In general, for PDE problems in which most nonzeros are near the diagonal,
1791    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1792    or you will get TERRIBLE performance; see the users' manual chapter on
1793    matrices.
1794 
1795    Level: intermediate
1796 
1797 .keywords: matrix, block, aij, compressed row, sparse, parallel
1798 
1799 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1800 @*/
1801 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1802 {
1803   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1804 
1805   PetscFunctionBegin;
1806   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1807   if (f) {
1808     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1809   }
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatCreateMPISBAIJ"
1815 /*@C
1816    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1817    (block compressed row).  For good matrix assembly performance
1818    the user should preallocate the matrix storage by setting the parameters
1819    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1820    performance can be increased by more than a factor of 50.
1821 
1822    Collective on MPI_Comm
1823 
1824    Input Parameters:
1825 +  comm - MPI communicator
1826 .  bs   - size of blockk
1827 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1828            This value should be the same as the local size used in creating the
1829            y vector for the matrix-vector product y = Ax.
1830 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1831            This value should be the same as the local size used in creating the
1832            x vector for the matrix-vector product y = Ax.
1833 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1834 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1835 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1836            submatrix  (same for all local rows)
1837 .  d_nnz - array containing the number of block nonzeros in the various block rows
1838            in the upper triangular portion of the in diagonal portion of the local
1839            (possibly different for each block block row) or PETSC_NULL.
1840            You must leave room for the diagonal entry even if it is zero.
1841 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1842            submatrix (same for all local rows).
1843 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1844            off-diagonal portion of the local submatrix (possibly different for
1845            each block row) or PETSC_NULL.
1846 
1847    Output Parameter:
1848 .  A - the matrix
1849 
1850    Options Database Keys:
1851 .   -mat_no_unroll - uses code that does not unroll the loops in the
1852                      block calculations (much slower)
1853 .   -mat_block_size - size of the blocks to use
1854 .   -mat_mpi - use the parallel matrix data structures even on one processor
1855                (defaults to using SeqBAIJ format on one processor)
1856 
1857    Notes:
1858    The number of rows and columns must be divisible by blocksize.
1859 
1860    The user MUST specify either the local or global matrix dimensions
1861    (possibly both).
1862 
1863    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1864    than it must be used on all processors that share the object for that argument.
1865 
1866    If the *_nnz parameter is given then the *_nz parameter is ignored
1867 
1868    Storage Information:
1869    For a square global matrix we define each processor's diagonal portion
1870    to be its local rows and the corresponding columns (a square submatrix);
1871    each processor's off-diagonal portion encompasses the remainder of the
1872    local matrix (a rectangular submatrix).
1873 
1874    The user can specify preallocated storage for the diagonal part of
1875    the local submatrix with either d_nz or d_nnz (not both).  Set
1876    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1877    memory allocation.  Likewise, specify preallocated storage for the
1878    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1879 
1880    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1881    the figure below we depict these three local rows and all columns (0-11).
1882 
1883 .vb
1884            0 1 2 3 4 5 6 7 8 9 10 11
1885           -------------------
1886    row 3  |  o o o d d d o o o o o o
1887    row 4  |  o o o d d d o o o o o o
1888    row 5  |  o o o d d d o o o o o o
1889           -------------------
1890 .ve
1891 
1892    Thus, any entries in the d locations are stored in the d (diagonal)
1893    submatrix, and any entries in the o locations are stored in the
1894    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1895    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1896 
1897    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1898    plus the diagonal part of the d matrix,
1899    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1900    In general, for PDE problems in which most nonzeros are near the diagonal,
1901    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1902    or you will get TERRIBLE performance; see the users' manual chapter on
1903    matrices.
1904 
1905    Level: intermediate
1906 
1907 .keywords: matrix, block, aij, compressed row, sparse, parallel
1908 
1909 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1910 @*/
1911 
1912 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1913 {
1914   PetscErrorCode ierr;
1915   PetscMPIInt    size;
1916 
1917   PetscFunctionBegin;
1918   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1919   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1920   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1921   if (size > 1) {
1922     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1923     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1924   } else {
1925     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1926     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1927   }
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1934 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1935 {
1936   Mat            mat;
1937   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1938   PetscErrorCode ierr;
1939   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1940   PetscScalar    *array;
1941 
1942   PetscFunctionBegin;
1943   *newmat       = 0;
1944   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1945   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1946   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1947   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1948 
1949   mat->factor       = matin->factor;
1950   mat->preallocated = PETSC_TRUE;
1951   mat->assembled    = PETSC_TRUE;
1952   mat->insertmode   = NOT_SET_VALUES;
1953 
1954   a = (Mat_MPISBAIJ*)mat->data;
1955   mat->bs  = matin->bs;
1956   a->bs2   = oldmat->bs2;
1957   a->mbs   = oldmat->mbs;
1958   a->nbs   = oldmat->nbs;
1959   a->Mbs   = oldmat->Mbs;
1960   a->Nbs   = oldmat->Nbs;
1961 
1962   a->rstart       = oldmat->rstart;
1963   a->rend         = oldmat->rend;
1964   a->cstart       = oldmat->cstart;
1965   a->cend         = oldmat->cend;
1966   a->size         = oldmat->size;
1967   a->rank         = oldmat->rank;
1968   a->donotstash   = oldmat->donotstash;
1969   a->roworiented  = oldmat->roworiented;
1970   a->rowindices   = 0;
1971   a->rowvalues    = 0;
1972   a->getrowactive = PETSC_FALSE;
1973   a->barray       = 0;
1974   a->rstart_bs    = oldmat->rstart_bs;
1975   a->rend_bs      = oldmat->rend_bs;
1976   a->cstart_bs    = oldmat->cstart_bs;
1977   a->cend_bs      = oldmat->cend_bs;
1978 
1979   /* hash table stuff */
1980   a->ht           = 0;
1981   a->hd           = 0;
1982   a->ht_size      = 0;
1983   a->ht_flag      = oldmat->ht_flag;
1984   a->ht_fact      = oldmat->ht_fact;
1985   a->ht_total_ct  = 0;
1986   a->ht_insert_ct = 0;
1987 
1988   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1989   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1990   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1991   if (oldmat->colmap) {
1992 #if defined (PETSC_USE_CTABLE)
1993     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1994 #else
1995     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1996     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1997     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1998 #endif
1999   } else a->colmap = 0;
2000 
2001   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2002     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2003     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2004     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2005   } else a->garray = 0;
2006 
2007   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2008   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2009   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2010   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2011 
2012   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2013   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2014   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2015   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2016 
2017   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2018   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2019   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2020   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2021   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2022   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2023   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2024   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2025   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2026   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2027   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2028   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2029   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2030 
2031   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2032   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2033   a->sMvctx = oldmat->sMvctx;
2034   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2035 
2036   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2037   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2038   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2039   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2040   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2041   *newmat = mat;
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 #include "petscsys.h"
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatLoad_MPISBAIJ"
2049 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2050 {
2051   Mat            A;
2052   PetscErrorCode ierr;
2053   PetscInt       i,nz,j,rstart,rend;
2054   PetscScalar    *vals,*buf;
2055   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2056   MPI_Status     status;
2057   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2058   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2059   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2060   PetscInt       bs=1,Mbs,mbs,extra_rows;
2061   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2062   PetscInt       dcount,kmax,k,nzcount,tmp;
2063   int            fd;
2064 
2065   PetscFunctionBegin;
2066   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2067 
2068   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2069   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2070   if (!rank) {
2071     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2072     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2073     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2074     if (header[3] < 0) {
2075       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2076     }
2077   }
2078 
2079   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2080   M = header[1]; N = header[2];
2081 
2082   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2083 
2084   /*
2085      This code adds extra rows to make sure the number of rows is
2086      divisible by the blocksize
2087   */
2088   Mbs        = M/bs;
2089   extra_rows = bs - M + bs*(Mbs);
2090   if (extra_rows == bs) extra_rows = 0;
2091   else                  Mbs++;
2092   if (extra_rows &&!rank) {
2093     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2094   }
2095 
2096   /* determine ownership of all rows */
2097   mbs        = Mbs/size + ((Mbs % size) > rank);
2098   m          = mbs*bs;
2099   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2100   browners   = rowners + size + 1;
2101   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2102   rowners[0] = 0;
2103   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2104   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2105   rstart = rowners[rank];
2106   rend   = rowners[rank+1];
2107 
2108   /* distribute row lengths to all processors */
2109   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2110   if (!rank) {
2111     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2112     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2113     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2114     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2115     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2116     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2117     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2118   } else {
2119     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2120   }
2121 
2122   if (!rank) {   /* procs[0] */
2123     /* calculate the number of nonzeros on each processor */
2124     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2125     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2126     for (i=0; i<size; i++) {
2127       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2128         procsnz[i] += rowlengths[j];
2129       }
2130     }
2131     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2132 
2133     /* determine max buffer needed and allocate it */
2134     maxnz = 0;
2135     for (i=0; i<size; i++) {
2136       maxnz = PetscMax(maxnz,procsnz[i]);
2137     }
2138     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2139 
2140     /* read in my part of the matrix column indices  */
2141     nz     = procsnz[0];
2142     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2143     mycols = ibuf;
2144     if (size == 1)  nz -= extra_rows;
2145     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2146     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2147 
2148     /* read in every ones (except the last) and ship off */
2149     for (i=1; i<size-1; i++) {
2150       nz   = procsnz[i];
2151       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2152       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2153     }
2154     /* read in the stuff for the last proc */
2155     if (size != 1) {
2156       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2157       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2158       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2159       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2160     }
2161     ierr = PetscFree(cols);CHKERRQ(ierr);
2162   } else {  /* procs[i], i>0 */
2163     /* determine buffer space needed for message */
2164     nz = 0;
2165     for (i=0; i<m; i++) {
2166       nz += locrowlens[i];
2167     }
2168     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2169     mycols = ibuf;
2170     /* receive message of column indices*/
2171     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2172     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2173     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2174   }
2175 
2176   /* loop over local rows, determining number of off diagonal entries */
2177   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2178   odlens   = dlens + (rend-rstart);
2179   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2180   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2181   masked1  = mask    + Mbs;
2182   masked2  = masked1 + Mbs;
2183   rowcount = 0; nzcount = 0;
2184   for (i=0; i<mbs; i++) {
2185     dcount  = 0;
2186     odcount = 0;
2187     for (j=0; j<bs; j++) {
2188       kmax = locrowlens[rowcount];
2189       for (k=0; k<kmax; k++) {
2190         tmp = mycols[nzcount++]/bs; /* block col. index */
2191         if (!mask[tmp]) {
2192           mask[tmp] = 1;
2193           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2194           else masked1[dcount++] = tmp; /* entry in diag portion */
2195         }
2196       }
2197       rowcount++;
2198     }
2199 
2200     dlens[i]  = dcount;  /* d_nzz[i] */
2201     odlens[i] = odcount; /* o_nzz[i] */
2202 
2203     /* zero out the mask elements we set */
2204     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2205     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2206   }
2207 
2208   /* create our matrix */
2209   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2210   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2211   ierr = MatSetType(A,type);CHKERRQ(ierr);
2212   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2213   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2214 
2215   if (!rank) {
2216     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2217     /* read in my part of the matrix numerical values  */
2218     nz = procsnz[0];
2219     vals = buf;
2220     mycols = ibuf;
2221     if (size == 1)  nz -= extra_rows;
2222     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2223     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2224 
2225     /* insert into matrix */
2226     jj      = rstart*bs;
2227     for (i=0; i<m; i++) {
2228       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2229       mycols += locrowlens[i];
2230       vals   += locrowlens[i];
2231       jj++;
2232     }
2233 
2234     /* read in other processors (except the last one) and ship out */
2235     for (i=1; i<size-1; i++) {
2236       nz   = procsnz[i];
2237       vals = buf;
2238       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2239       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2240     }
2241     /* the last proc */
2242     if (size != 1){
2243       nz   = procsnz[i] - extra_rows;
2244       vals = buf;
2245       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2246       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2247       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2248     }
2249     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2250 
2251   } else {
2252     /* receive numeric values */
2253     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2254 
2255     /* receive message of values*/
2256     vals   = buf;
2257     mycols = ibuf;
2258     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2259     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2260     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2261 
2262     /* insert into matrix */
2263     jj      = rstart*bs;
2264     for (i=0; i<m; i++) {
2265       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2266       mycols += locrowlens[i];
2267       vals   += locrowlens[i];
2268       jj++;
2269     }
2270   }
2271 
2272   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2273   ierr = PetscFree(buf);CHKERRQ(ierr);
2274   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2275   ierr = PetscFree(rowners);CHKERRQ(ierr);
2276   ierr = PetscFree(dlens);CHKERRQ(ierr);
2277   ierr = PetscFree(mask);CHKERRQ(ierr);
2278   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2279   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2280   *newmat = A;
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2286 /*XXXXX@
2287    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2288 
2289    Input Parameters:
2290 .  mat  - the matrix
2291 .  fact - factor
2292 
2293    Collective on Mat
2294 
2295    Level: advanced
2296 
2297   Notes:
2298    This can also be set by the command line option: -mat_use_hash_table fact
2299 
2300 .keywords: matrix, hashtable, factor, HT
2301 
2302 .seealso: MatSetOption()
2303 @XXXXX*/
2304 
2305 
2306 #undef __FUNCT__
2307 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2308 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2309 {
2310   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2311   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2312   PetscReal      atmp;
2313   PetscReal      *work,*svalues,*rvalues;
2314   PetscErrorCode ierr;
2315   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2316   PetscMPIInt    rank,size;
2317   PetscInt       *rowners_bs,dest,count,source;
2318   PetscScalar    *va;
2319   MatScalar      *ba;
2320   MPI_Status     stat;
2321 
2322   PetscFunctionBegin;
2323   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2324   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2325 
2326   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2327   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2328 
2329   bs   = A->bs;
2330   mbs  = a->mbs;
2331   Mbs  = a->Mbs;
2332   ba   = b->a;
2333   bi   = b->i;
2334   bj   = b->j;
2335 
2336   /* find ownerships */
2337   rowners_bs = a->rowners_bs;
2338 
2339   /* each proc creates an array to be distributed */
2340   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2341   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2342 
2343   /* row_max for B */
2344   if (rank != size-1){
2345     for (i=0; i<mbs; i++) {
2346       ncols = bi[1] - bi[0]; bi++;
2347       brow  = bs*i;
2348       for (j=0; j<ncols; j++){
2349         bcol = bs*(*bj);
2350         for (kcol=0; kcol<bs; kcol++){
2351           col = bcol + kcol;                 /* local col index */
2352           col += rowners_bs[rank+1];      /* global col index */
2353           for (krow=0; krow<bs; krow++){
2354             atmp = PetscAbsScalar(*ba); ba++;
2355             row = brow + krow;    /* local row index */
2356             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2357             if (work[col] < atmp) work[col] = atmp;
2358           }
2359         }
2360         bj++;
2361       }
2362     }
2363 
2364     /* send values to its owners */
2365     for (dest=rank+1; dest<size; dest++){
2366       svalues = work + rowners_bs[dest];
2367       count   = rowners_bs[dest+1]-rowners_bs[dest];
2368       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2369     }
2370   }
2371 
2372   /* receive values */
2373   if (rank){
2374     rvalues = work;
2375     count   = rowners_bs[rank+1]-rowners_bs[rank];
2376     for (source=0; source<rank; source++){
2377       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2378       /* process values */
2379       for (i=0; i<count; i++){
2380         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2381       }
2382     }
2383   }
2384 
2385   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2386   ierr = PetscFree(work);CHKERRQ(ierr);
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatRelax_MPISBAIJ"
2392 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2393 {
2394   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2395   PetscErrorCode ierr;
2396   PetscInt       mbs=mat->mbs,bs=matin->bs;
2397   PetscScalar    *x,*b,*ptr,zero=0.0;
2398   Vec            bb1;
2399 
2400   PetscFunctionBegin;
2401   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2402   if (bs > 1)
2403     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2404 
2405   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2406     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2407       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2408       its--;
2409     }
2410 
2411     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2412     while (its--){
2413 
2414       /* lower triangular part: slvec0b = - B^T*xx */
2415       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2416 
2417       /* copy xx into slvec0a */
2418       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2419       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2420       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2421       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2422 
2423       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2424 
2425       /* copy bb into slvec1a */
2426       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2427       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2428       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2429       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2430 
2431       /* set slvec1b = 0 */
2432       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2433 
2434       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2435       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2436       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2437       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2438 
2439       /* upper triangular part: bb1 = bb1 - B*x */
2440       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2441 
2442       /* local diagonal sweep */
2443       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2444     }
2445     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2446   } else {
2447     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2448   }
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2454 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2455 {
2456   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2457   PetscErrorCode ierr;
2458   Vec            lvec1,bb1;
2459 
2460   PetscFunctionBegin;
2461   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2462   if (matin->bs > 1)
2463     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2464 
2465   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2466     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2467       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2468       its--;
2469     }
2470 
2471     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2472     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2473     while (its--){
2474       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2475 
2476       /* lower diagonal part: bb1 = bb - B^T*xx */
2477       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2478       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2479 
2480       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2481       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2482       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2483 
2484       /* upper diagonal part: bb1 = bb1 - B*x */
2485       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2486       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2487 
2488       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2489 
2490       /* diagonal sweep */
2491       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2492     }
2493     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2494     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2495   } else {
2496     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2497   }
2498   PetscFunctionReturn(0);
2499 }
2500 
2501