xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 7e4bb74c6af8fbefd31a77f85fd215bd58beb805)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=baij->rstart_bs;
218   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
219   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc=0;
238   MatScalar     *v_loc=0;
239 
240   PetscFunctionBegin;
241 
242   if(!baij->donotstash){
243     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
244     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
245   }
246 
247   for (i=0; i<m; i++) {
248     if (im[i] < 0) continue;
249 #if defined(PETSC_USE_DEBUG)
250     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
251 #endif
252     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
253       row = im[i] - rstart_orig;              /* local row index */
254       for (j=0; j<n; j++) {
255         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
256         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
257           col = in[j] - cstart_orig;          /* local col index */
258           brow = row/bs; bcol = col/bs;
259           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
260           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
261           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         } else if (in[j] < 0) continue;
264 #if defined(PETSC_USE_DEBUG)
265         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
266 #endif
267         else {  /* off-diag entry (B) */
268           if (mat->was_assembled) {
269             if (!baij->colmap) {
270               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
271             }
272 #if defined (PETSC_USE_CTABLE)
273             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
274             col  = col - 1;
275 #else
276             col = baij->colmap[in[j]/bs] - 1;
277 #endif
278             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
279               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
280               col =  in[j];
281               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
282               B = baij->B;
283               b = (Mat_SeqBAIJ*)(B)->data;
284               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
285               ba=b->a;
286             } else col += in[j]%bs;
287           } else col = in[j];
288           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
289           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
290           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
291         }
292       }
293     } else {  /* off processor entry */
294       if (!baij->donotstash) {
295         n_loc = 0;
296         for (j=0; j<n; j++){
297           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
298           in_loc[n_loc] = in[j];
299           if (roworiented) {
300             v_loc[n_loc] = v[i*n+j];
301           } else {
302             v_loc[n_loc] = v[j*m+i];
303           }
304           n_loc++;
305         }
306         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
307       }
308     }
309   }
310 
311   if(!baij->donotstash){
312     ierr = PetscFree(in_loc);CHKERRQ(ierr);
313     ierr = PetscFree(v_loc);CHKERRQ(ierr);
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
321 {
322   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
323   const MatScalar *value;
324   MatScalar       *barray=baij->barray;
325   PetscTruth      roworiented = baij->roworiented;
326   PetscErrorCode  ierr;
327   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
328   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
329   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
330 
331   PetscFunctionBegin;
332   if(!barray) {
333     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
334     baij->barray = barray;
335   }
336 
337   if (roworiented) {
338     stepval = (n-1)*bs;
339   } else {
340     stepval = (m-1)*bs;
341   }
342   for (i=0; i<m; i++) {
343     if (im[i] < 0) continue;
344 #if defined(PETSC_USE_DEBUG)
345     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
346 #endif
347     if (im[i] >= rstart && im[i] < rend) {
348       row = im[i] - rstart;
349       for (j=0; j<n; j++) {
350         /* If NumCol = 1 then a copy is not required */
351         if ((roworiented) && (n == 1)) {
352           barray = (MatScalar*) v + i*bs2;
353         } else if((!roworiented) && (m == 1)) {
354           barray = (MatScalar*) v + j*bs2;
355         } else { /* Here a copy is required */
356           if (roworiented) {
357             value = v + i*(stepval+bs)*bs + j*bs;
358           } else {
359             value = v + j*(stepval+bs)*bs + i*bs;
360           }
361           for (ii=0; ii<bs; ii++,value+=stepval) {
362             for (jj=0; jj<bs; jj++) {
363               *barray++  = *value++;
364             }
365           }
366           barray -=bs2;
367         }
368 
369         if (in[j] >= cstart && in[j] < cend){
370           col  = in[j] - cstart;
371           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
372         }
373         else if (in[j] < 0) continue;
374 #if defined(PETSC_USE_DEBUG)
375         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
376 #endif
377         else {
378           if (mat->was_assembled) {
379             if (!baij->colmap) {
380               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381             }
382 
383 #if defined(PETSC_USE_DEBUG)
384 #if defined (PETSC_USE_CTABLE)
385             { PetscInt data;
386               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
387               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
388             }
389 #else
390             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
391 #endif
392 #endif
393 #if defined (PETSC_USE_CTABLE)
394 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
395             col  = (col - 1)/bs;
396 #else
397             col = (baij->colmap[in[j]] - 1)/bs;
398 #endif
399             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
400               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
401               col =  in[j];
402             }
403           }
404           else col = in[j];
405           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
406         }
407       }
408     } else {
409       if (!baij->donotstash) {
410         if (roworiented) {
411           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         } else {
413           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
414         }
415       }
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatGetValues_MPISBAIJ"
423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
424 {
425   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
426   PetscErrorCode ierr;
427   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
428   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
429 
430   PetscFunctionBegin;
431   for (i=0; i<m; i++) {
432     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
433     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
434     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
435       row = idxm[i] - bsrstart;
436       for (j=0; j<n; j++) {
437         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
438         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
439         if (idxn[j] >= bscstart && idxn[j] < bscend){
440           col = idxn[j] - bscstart;
441           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442         } else {
443           if (!baij->colmap) {
444             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
445           }
446 #if defined (PETSC_USE_CTABLE)
447           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
448           data --;
449 #else
450           data = baij->colmap[idxn[j]/bs]-1;
451 #endif
452           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
453           else {
454             col  = data + idxn[j]%bs;
455             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
456           }
457         }
458       }
459     } else {
460       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
461     }
462   }
463  PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatNorm_MPISBAIJ"
468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
469 {
470   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
471   PetscErrorCode ierr;
472   PetscReal      sum[2],*lnorm2;
473 
474   PetscFunctionBegin;
475   if (baij->size == 1) {
476     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
477   } else {
478     if (type == NORM_FROBENIUS) {
479       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
480       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
481       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
482       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
483       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
484       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
485       *norm = sqrt(sum[0] + 2*sum[1]);
486       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
487     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
488       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
489       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
490       PetscReal    *rsum,*rsum2,vabs;
491       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
492       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
493       MatScalar    *v;
494 
495       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
496       rsum2 = rsum + mat->N;
497       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
498       /* Amat */
499       v = amat->a; jj = amat->j;
500       for (brow=0; brow<mbs; brow++) {
501         grow = bs*(rstart + brow);
502         nz = amat->i[brow+1] - amat->i[brow];
503         for (bcol=0; bcol<nz; bcol++){
504           gcol = bs*(rstart + *jj); jj++;
505           for (col=0; col<bs; col++){
506             for (row=0; row<bs; row++){
507               vabs = PetscAbsScalar(*v); v++;
508               rsum[gcol+col] += vabs;
509               /* non-diagonal block */
510               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
511             }
512           }
513         }
514       }
515       /* Bmat */
516       v = bmat->a; jj = bmat->j;
517       for (brow=0; brow<mbs; brow++) {
518         grow = bs*(rstart + brow);
519         nz = bmat->i[brow+1] - bmat->i[brow];
520         for (bcol=0; bcol<nz; bcol++){
521           gcol = bs*garray[*jj]; jj++;
522           for (col=0; col<bs; col++){
523             for (row=0; row<bs; row++){
524               vabs = PetscAbsScalar(*v); v++;
525               rsum[gcol+col] += vabs;
526               rsum[grow+row] += vabs;
527             }
528           }
529         }
530       }
531       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
532       *norm = 0.0;
533       for (col=0; col<mat->N; col++) {
534         if (rsum2[col] > *norm) *norm = rsum2[col];
535       }
536       ierr = PetscFree(rsum);CHKERRQ(ierr);
537     } else {
538       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
539     }
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
547 {
548   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
549   PetscErrorCode ierr;
550   PetscInt       nstash,reallocs;
551   InsertMode     addv;
552 
553   PetscFunctionBegin;
554   if (baij->donotstash) {
555     PetscFunctionReturn(0);
556   }
557 
558   /* make sure all processors are either in INSERTMODE or ADDMODE */
559   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
560   if (addv == (ADD_VALUES|INSERT_VALUES)) {
561     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
562   }
563   mat->insertmode = addv; /* in case this processor had no cache */
564 
565   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
566   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
567   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
568   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
569   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
570   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
577 {
578   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
579   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
580   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
581   PetscErrorCode ierr;
582   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
583   PetscInt       *row,*col,other_disassembled;
584   PetscMPIInt    n;
585   PetscTruth     r1,r2,r3;
586   MatScalar      *val;
587   InsertMode     addv = mat->insertmode;
588 
589   PetscFunctionBegin;
590 
591   if (!baij->donotstash) {
592     while (1) {
593       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
594       if (!flg) break;
595 
596       for (i=0; i<n;) {
597         /* Now identify the consecutive vals belonging to the same row */
598         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
599         if (j < n) ncols = j-i;
600         else       ncols = n-i;
601         /* Now assemble all these values with a single function call */
602         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
603         i = j;
604       }
605     }
606     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
607     /* Now process the block-stash. Since the values are stashed column-oriented,
608        set the roworiented flag to column oriented, and after MatSetValues()
609        restore the original flags */
610     r1 = baij->roworiented;
611     r2 = a->roworiented;
612     r3 = b->roworiented;
613     baij->roworiented = PETSC_FALSE;
614     a->roworiented    = PETSC_FALSE;
615     b->roworiented    = PETSC_FALSE;
616     while (1) {
617       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
618       if (!flg) break;
619 
620       for (i=0; i<n;) {
621         /* Now identify the consecutive vals belonging to the same row */
622         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
623         if (j < n) ncols = j-i;
624         else       ncols = n-i;
625         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
626         i = j;
627       }
628     }
629     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
630     baij->roworiented = r1;
631     a->roworiented    = r2;
632     b->roworiented    = r3;
633   }
634 
635   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
636   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
637 
638   /* determine if any processor has disassembled, if so we must
639      also disassemble ourselfs, in order that we may reassemble. */
640   /*
641      if nonzero structure of submatrix B cannot change then we know that
642      no processor disassembled thus we can skip this stuff
643   */
644   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
645     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
646     if (mat->was_assembled && !other_disassembled) {
647       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
648     }
649   }
650 
651   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
652     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
653   }
654   b->compressedrow.use = PETSC_TRUE;
655   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
656   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
657 
658   if (baij->rowvalues) {
659     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
660     baij->rowvalues = 0;
661   }
662 
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
669 {
670   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode    ierr;
672   PetscInt          bs = mat->bs;
673   PetscMPIInt       size = baij->size,rank = baij->rank;
674   PetscTruth        iascii,isdraw;
675   PetscViewer       sviewer;
676   PetscViewerFormat format;
677 
678   PetscFunctionBegin;
679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
680   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
681   if (iascii) {
682     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
683     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684       MatInfo info;
685       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
686       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
687       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
688               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
689               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
690       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
691       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
692       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
694       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
695       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
696       PetscFunctionReturn(0);
697     } else if (format == PETSC_VIEWER_ASCII_INFO) {
698       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
701       PetscFunctionReturn(0);
702     }
703   }
704 
705   if (isdraw) {
706     PetscDraw       draw;
707     PetscTruth isnull;
708     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
709     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
710   }
711 
712   if (size == 1) {
713     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
714     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
715   } else {
716     /* assemble the entire matrix onto first processor. */
717     Mat         A;
718     Mat_SeqSBAIJ *Aloc;
719     Mat_SeqBAIJ *Bloc;
720     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
721     MatScalar   *a;
722 
723     /* Should this be the same type as mat? */
724     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
725     if (!rank) {
726       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
727     } else {
728       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
729     }
730     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
731     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
732     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
733 
734     /* copy over the A part */
735     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
736     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
737     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
738 
739     for (i=0; i<mbs; i++) {
740       rvals[0] = bs*(baij->rstart + i);
741       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
742       for (j=ai[i]; j<ai[i+1]; j++) {
743         col = (baij->cstart+aj[j])*bs;
744         for (k=0; k<bs; k++) {
745           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
746           col++; a += bs;
747         }
748       }
749     }
750     /* copy over the B part */
751     Bloc = (Mat_SeqBAIJ*)baij->B->data;
752     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
753     for (i=0; i<mbs; i++) {
754       rvals[0] = bs*(baij->rstart + i);
755       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
756       for (j=ai[i]; j<ai[i+1]; j++) {
757         col = baij->garray[aj[j]]*bs;
758         for (k=0; k<bs; k++) {
759           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
760           col++; a += bs;
761         }
762       }
763     }
764     ierr = PetscFree(rvals);CHKERRQ(ierr);
765     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
766     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
767     /*
768        Everyone has to call to draw the matrix since the graphics waits are
769        synchronized across all processors that share the PetscDraw object
770     */
771     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
772     if (!rank) {
773       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
774       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
775     }
776     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
777     ierr = MatDestroy(A);CHKERRQ(ierr);
778   }
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "MatView_MPISBAIJ"
784 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
785 {
786   PetscErrorCode ierr;
787   PetscTruth     iascii,isdraw,issocket,isbinary;
788 
789   PetscFunctionBegin;
790   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
791   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
792   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
793   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
794   if (iascii || isdraw || issocket || isbinary) {
795     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
796   } else {
797     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatDestroy_MPISBAIJ"
804 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
805 {
806   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
807   PetscErrorCode ierr;
808 
809   PetscFunctionBegin;
810 #if defined(PETSC_USE_LOG)
811   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
812 #endif
813   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
814   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
815   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
816   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
817   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
818 #if defined (PETSC_USE_CTABLE)
819   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
820 #else
821   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
822 #endif
823   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
824   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
825   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
826   if (baij->slvec0) {
827     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
828     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
829   }
830   if (baij->slvec1) {
831     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
832     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
833     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
834   }
835   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
836   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
837   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
838   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
839 #if defined(PETSC_USE_MAT_SINGLE)
840   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
841 #endif
842   ierr = PetscFree(baij);CHKERRQ(ierr);
843 
844   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
845   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
846   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
847   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatMult_MPISBAIJ"
853 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
854 {
855   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
856   PetscErrorCode ierr;
857   PetscInt       nt,mbs=a->mbs,bs=A->bs;
858   PetscScalar    *x,*from,zero=0.0;
859 
860   PetscFunctionBegin;
861   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
862   if (nt != A->n) {
863     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
864   }
865   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
866   if (nt != A->m) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
868   }
869 
870   /* diagonal part */
871   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
872   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
873 
874   /* subdiagonal part */
875   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
876 
877   /* copy x into the vec slvec0 */
878   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
879   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
880   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
881   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
882 
883   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
884   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
885   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
886 
887   /* supperdiagonal part */
888   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
889 
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
895 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
896 {
897   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
898   PetscErrorCode ierr;
899   PetscInt       nt;
900 
901   PetscFunctionBegin;
902   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
903   if (nt != A->n) {
904     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
905   }
906   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
907   if (nt != A->m) {
908     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
909   }
910 
911   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
912   /* do diagonal part */
913   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
914   /* do supperdiagonal part */
915   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
916   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
917   /* do subdiagonal part */
918   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
919   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
920   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
921 
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
927 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
928 {
929   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
930   PetscErrorCode ierr;
931   PetscInt       mbs=a->mbs,bs=A->bs;
932   PetscScalar    *x,*from,zero=0.0;
933 
934   PetscFunctionBegin;
935   /*
936   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
937   PetscSynchronizedFlush(A->comm);
938   */
939   /* diagonal part */
940   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
941   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
942 
943   /* subdiagonal part */
944   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
945 
946   /* copy x into the vec slvec0 */
947   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
948   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
949   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
950   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
951 
952   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
953   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
955 
956   /* supperdiagonal part */
957   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
958 
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
964 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
965 {
966   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
967   PetscErrorCode ierr;
968 
969   PetscFunctionBegin;
970   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
971   /* do diagonal part */
972   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
973   /* do supperdiagonal part */
974   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976 
977   /* do subdiagonal part */
978   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
979   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
980   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
981 
982   PetscFunctionReturn(0);
983 }
984 
985 /*
986   This only works correctly for square matrices where the subblock A->A is the
987    diagonal block
988 */
989 #undef __FUNCT__
990 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
991 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
992 {
993   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
994   PetscErrorCode ierr;
995 
996   PetscFunctionBegin;
997   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
998   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatScale_MPISBAIJ"
1004 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1005 {
1006   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1011   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1017 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1018 {
1019   PetscFunctionBegin;
1020   if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1026 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1027 {
1028   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1029 
1030   PetscFunctionBegin;
1031   if (!baij->getrowactive) {
1032     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1033   }
1034   baij->getrowactive = PETSC_FALSE;
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1040 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1041 {
1042   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1047   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1053 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1054 {
1055   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1056   PetscErrorCode ierr;
1057 
1058   PetscFunctionBegin;
1059   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1060   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1066 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1067 {
1068   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1069   PetscErrorCode ierr;
1070 
1071   PetscFunctionBegin;
1072   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1073   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1079 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1080 {
1081   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1082   Mat            A = a->A,B = a->B;
1083   PetscErrorCode ierr;
1084   PetscReal      isend[5],irecv[5];
1085 
1086   PetscFunctionBegin;
1087   info->block_size     = (PetscReal)matin->bs;
1088   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1089   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1090   isend[3] = info->memory;  isend[4] = info->mallocs;
1091   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1092   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1093   isend[3] += info->memory;  isend[4] += info->mallocs;
1094   if (flag == MAT_LOCAL) {
1095     info->nz_used      = isend[0];
1096     info->nz_allocated = isend[1];
1097     info->nz_unneeded  = isend[2];
1098     info->memory       = isend[3];
1099     info->mallocs      = isend[4];
1100   } else if (flag == MAT_GLOBAL_MAX) {
1101     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1102     info->nz_used      = irecv[0];
1103     info->nz_allocated = irecv[1];
1104     info->nz_unneeded  = irecv[2];
1105     info->memory       = irecv[3];
1106     info->mallocs      = irecv[4];
1107   } else if (flag == MAT_GLOBAL_SUM) {
1108     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1109     info->nz_used      = irecv[0];
1110     info->nz_allocated = irecv[1];
1111     info->nz_unneeded  = irecv[2];
1112     info->memory       = irecv[3];
1113     info->mallocs      = irecv[4];
1114   } else {
1115     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1116   }
1117   info->rows_global       = (PetscReal)A->M;
1118   info->columns_global    = (PetscReal)A->N;
1119   info->rows_local        = (PetscReal)A->m;
1120   info->columns_local     = (PetscReal)A->N;
1121   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1122   info->fill_ratio_needed = 0;
1123   info->factor_mallocs    = 0;
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1129 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1130 {
1131   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   switch (op) {
1136   case MAT_NO_NEW_NONZERO_LOCATIONS:
1137   case MAT_YES_NEW_NONZERO_LOCATIONS:
1138   case MAT_COLUMNS_UNSORTED:
1139   case MAT_COLUMNS_SORTED:
1140   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1141   case MAT_KEEP_ZEROED_ROWS:
1142   case MAT_NEW_NONZERO_LOCATION_ERR:
1143     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1144     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1145     break;
1146   case MAT_ROW_ORIENTED:
1147     a->roworiented = PETSC_TRUE;
1148     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1149     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1150     break;
1151   case MAT_ROWS_SORTED:
1152   case MAT_ROWS_UNSORTED:
1153   case MAT_YES_NEW_DIAGONALS:
1154     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1155     break;
1156   case MAT_COLUMN_ORIENTED:
1157     a->roworiented = PETSC_FALSE;
1158     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1159     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1160     break;
1161   case MAT_IGNORE_OFF_PROC_ENTRIES:
1162     a->donotstash = PETSC_TRUE;
1163     break;
1164   case MAT_NO_NEW_DIAGONALS:
1165     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1166   case MAT_USE_HASH_TABLE:
1167     a->ht_flag = PETSC_TRUE;
1168     break;
1169   case MAT_NOT_SYMMETRIC:
1170   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1171   case MAT_HERMITIAN:
1172     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1173   case MAT_SYMMETRIC:
1174   case MAT_STRUCTURALLY_SYMMETRIC:
1175   case MAT_NOT_HERMITIAN:
1176   case MAT_SYMMETRY_ETERNAL:
1177   case MAT_NOT_SYMMETRY_ETERNAL:
1178     break;
1179   default:
1180     SETERRQ(PETSC_ERR_SUP,"unknown option");
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1187 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1188 {
1189   PetscErrorCode ierr;
1190   PetscFunctionBegin;
1191   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1197 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1198 {
1199   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1200   Mat            a=baij->A, b=baij->B;
1201   PetscErrorCode ierr;
1202   PetscInt       nv,m,n;
1203   PetscTruth     flg;
1204 
1205   PetscFunctionBegin;
1206   if (ll != rr){
1207     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1208     if (!flg)
1209       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1210   }
1211   if (!ll) PetscFunctionReturn(0);
1212 
1213   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1214   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1215 
1216   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1217   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1218 
1219   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1220 
1221   /* left diagonalscale the off-diagonal part */
1222   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1223 
1224   /* scale the diagonal part */
1225   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1226 
1227   /* right diagonalscale the off-diagonal part */
1228   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1229   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1235 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1236 {
1237   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1238   MPI_Comm          comm = A->comm;
1239   static PetscTruth called = PETSC_FALSE;
1240   PetscErrorCode    ierr;
1241 
1242   PetscFunctionBegin;
1243   if (!a->rank) {
1244     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1245   }
1246   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1247   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1248   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1254 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1255 {
1256   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatEqual_MPISBAIJ"
1268 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1269 {
1270   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1271   Mat            a,b,c,d;
1272   PetscTruth     flg;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276   a = matA->A; b = matA->B;
1277   c = matB->A; d = matB->B;
1278 
1279   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1280   if (flg) {
1281     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1282   }
1283   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatCopy_MPISBAIJ"
1289 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1290 {
1291   PetscErrorCode ierr;
1292   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1293   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1294 
1295   PetscFunctionBegin;
1296   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1297   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1298     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1299   } else {
1300     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1301     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1302   }
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1308 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1309 {
1310   PetscErrorCode ierr;
1311 
1312   PetscFunctionBegin;
1313   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #include "petscblaslapack.h"
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1320 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1321 {
1322   PetscErrorCode ierr;
1323   PetscInt       i;
1324   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1325   PetscBLASInt   bnz,one=1;
1326   Mat_SeqSBAIJ   *xa,*ya;
1327   Mat_SeqBAIJ    *xb,*yb;
1328 
1329   PetscFunctionBegin;
1330   if (str == SAME_NONZERO_PATTERN) {
1331     PetscScalar alpha = a;
1332     xa = (Mat_SeqSBAIJ *)xx->A->data;
1333     ya = (Mat_SeqSBAIJ *)yy->A->data;
1334     bnz = (PetscBLASInt)xa->nz;
1335     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1336     xb = (Mat_SeqBAIJ *)xx->B->data;
1337     yb = (Mat_SeqBAIJ *)yy->B->data;
1338     bnz = (PetscBLASInt)xb->nz;
1339     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1340   } else {
1341     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1342   }
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1348 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1349 {
1350   PetscErrorCode ierr;
1351   PetscInt       i;
1352   PetscTruth     flg;
1353 
1354   PetscFunctionBegin;
1355   for (i=0; i<n; i++) {
1356     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1357     if (!flg) {
1358       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1359     }
1360   }
1361   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 
1366 /* -------------------------------------------------------------------*/
1367 static struct _MatOps MatOps_Values = {
1368        MatSetValues_MPISBAIJ,
1369        MatGetRow_MPISBAIJ,
1370        MatRestoreRow_MPISBAIJ,
1371        MatMult_MPISBAIJ,
1372 /* 4*/ MatMultAdd_MPISBAIJ,
1373        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1374        MatMultAdd_MPISBAIJ,
1375        0,
1376        0,
1377        0,
1378 /*10*/ 0,
1379        0,
1380        0,
1381        MatRelax_MPISBAIJ,
1382        MatTranspose_MPISBAIJ,
1383 /*15*/ MatGetInfo_MPISBAIJ,
1384        MatEqual_MPISBAIJ,
1385        MatGetDiagonal_MPISBAIJ,
1386        MatDiagonalScale_MPISBAIJ,
1387        MatNorm_MPISBAIJ,
1388 /*20*/ MatAssemblyBegin_MPISBAIJ,
1389        MatAssemblyEnd_MPISBAIJ,
1390        0,
1391        MatSetOption_MPISBAIJ,
1392        MatZeroEntries_MPISBAIJ,
1393 /*25*/ 0,
1394        0,
1395        0,
1396        0,
1397        0,
1398 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1399        0,
1400        0,
1401        0,
1402        0,
1403 /*35*/ MatDuplicate_MPISBAIJ,
1404        0,
1405        0,
1406        0,
1407        0,
1408 /*40*/ MatAXPY_MPISBAIJ,
1409        MatGetSubMatrices_MPISBAIJ,
1410        MatIncreaseOverlap_MPISBAIJ,
1411        MatGetValues_MPISBAIJ,
1412        MatCopy_MPISBAIJ,
1413 /*45*/ MatPrintHelp_MPISBAIJ,
1414        MatScale_MPISBAIJ,
1415        0,
1416        0,
1417        0,
1418 /*50*/ 0,
1419        0,
1420        0,
1421        0,
1422        0,
1423 /*55*/ 0,
1424        0,
1425        MatSetUnfactored_MPISBAIJ,
1426        0,
1427        MatSetValuesBlocked_MPISBAIJ,
1428 /*60*/ 0,
1429        0,
1430        0,
1431        MatGetPetscMaps_Petsc,
1432        0,
1433 /*65*/ 0,
1434        0,
1435        0,
1436        0,
1437        0,
1438 /*70*/ MatGetRowMax_MPISBAIJ,
1439        0,
1440        0,
1441        0,
1442        0,
1443 /*75*/ 0,
1444        0,
1445        0,
1446        0,
1447        0,
1448 /*80*/ 0,
1449        0,
1450        0,
1451        0,
1452        MatLoad_MPISBAIJ,
1453 /*85*/ 0,
1454        0,
1455        0,
1456        0,
1457        0,
1458 /*90*/ 0,
1459        0,
1460        0,
1461        0,
1462        0,
1463 /*95*/ 0,
1464        0,
1465        0,
1466        0,
1467        0,
1468 /*100*/0,
1469        0,
1470        0,
1471        0,
1472        0,
1473 /*105*/0,
1474        MatRealPart_MPISBAIJ,
1475        MatImaginaryPart_MPISBAIJ
1476 };
1477 
1478 
1479 EXTERN_C_BEGIN
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1482 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1483 {
1484   PetscFunctionBegin;
1485   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1486   *iscopy = PETSC_FALSE;
1487   PetscFunctionReturn(0);
1488 }
1489 EXTERN_C_END
1490 
1491 EXTERN_C_BEGIN
1492 #undef __FUNCT__
1493 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1494 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1495 {
1496   Mat_MPISBAIJ   *b;
1497   PetscErrorCode ierr;
1498   PetscInt       i,mbs,Mbs;
1499 
1500   PetscFunctionBegin;
1501   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1502 
1503   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1504   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1505   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1506   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1507   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1508   if (d_nnz) {
1509     for (i=0; i<B->m/bs; i++) {
1510       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1511     }
1512   }
1513   if (o_nnz) {
1514     for (i=0; i<B->m/bs; i++) {
1515       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1516     }
1517   }
1518   B->preallocated = PETSC_TRUE;
1519   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1520   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1521   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1522   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1523 
1524   b   = (Mat_MPISBAIJ*)B->data;
1525   mbs = B->m/bs;
1526   Mbs = B->M/bs;
1527   if (mbs*bs != B->m) {
1528     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1529   }
1530 
1531   B->bs  = bs;
1532   b->bs2 = bs*bs;
1533   b->mbs = mbs;
1534   b->nbs = mbs;
1535   b->Mbs = Mbs;
1536   b->Nbs = Mbs;
1537 
1538   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1539   b->rowners[0]    = 0;
1540   for (i=2; i<=b->size; i++) {
1541     b->rowners[i] += b->rowners[i-1];
1542   }
1543   b->rstart    = b->rowners[b->rank];
1544   b->rend      = b->rowners[b->rank+1];
1545   b->cstart    = b->rstart;
1546   b->cend      = b->rend;
1547   for (i=0; i<=b->size; i++) {
1548     b->rowners_bs[i] = b->rowners[i]*bs;
1549   }
1550   b->rstart_bs = b-> rstart*bs;
1551   b->rend_bs   = b->rend*bs;
1552 
1553   b->cstart_bs = b->cstart*bs;
1554   b->cend_bs   = b->cend*bs;
1555 
1556   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1557   ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr);
1558   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1559   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1560   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1561 
1562   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1563   ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr);
1564   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1565   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1566   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1567 
1568   /* build cache for off array entries formed */
1569   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1570 
1571   PetscFunctionReturn(0);
1572 }
1573 EXTERN_C_END
1574 
1575 /*MC
1576    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1577    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1578 
1579    Options Database Keys:
1580 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1581 
1582   Level: beginner
1583 
1584 .seealso: MatCreateMPISBAIJ
1585 M*/
1586 
1587 EXTERN_C_BEGIN
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatCreate_MPISBAIJ"
1590 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1591 {
1592   Mat_MPISBAIJ   *b;
1593   PetscErrorCode ierr;
1594   PetscTruth     flg;
1595 
1596   PetscFunctionBegin;
1597 
1598   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1599   B->data = (void*)b;
1600   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1601 
1602   B->ops->destroy    = MatDestroy_MPISBAIJ;
1603   B->ops->view       = MatView_MPISBAIJ;
1604   B->mapping    = 0;
1605   B->factor     = 0;
1606   B->assembled  = PETSC_FALSE;
1607 
1608   B->insertmode = NOT_SET_VALUES;
1609   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1610   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1611 
1612   /* build local table of row and column ownerships */
1613   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1614   b->cowners    = b->rowners + b->size + 2;
1615   b->rowners_bs = b->cowners + b->size + 2;
1616   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1617 
1618   /* build cache for off array entries formed */
1619   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1620   b->donotstash  = PETSC_FALSE;
1621   b->colmap      = PETSC_NULL;
1622   b->garray      = PETSC_NULL;
1623   b->roworiented = PETSC_TRUE;
1624 
1625 #if defined(PETSC_USE_MAT_SINGLE)
1626   /* stuff for MatSetValues_XXX in single precision */
1627   b->setvalueslen     = 0;
1628   b->setvaluescopy    = PETSC_NULL;
1629 #endif
1630 
1631   /* stuff used in block assembly */
1632   b->barray       = 0;
1633 
1634   /* stuff used for matrix vector multiply */
1635   b->lvec         = 0;
1636   b->Mvctx        = 0;
1637   b->slvec0       = 0;
1638   b->slvec0b      = 0;
1639   b->slvec1       = 0;
1640   b->slvec1a      = 0;
1641   b->slvec1b      = 0;
1642   b->sMvctx       = 0;
1643 
1644   /* stuff for MatGetRow() */
1645   b->rowindices   = 0;
1646   b->rowvalues    = 0;
1647   b->getrowactive = PETSC_FALSE;
1648 
1649   /* hash table stuff */
1650   b->ht           = 0;
1651   b->hd           = 0;
1652   b->ht_size      = 0;
1653   b->ht_flag      = PETSC_FALSE;
1654   b->ht_fact      = 0;
1655   b->ht_total_ct  = 0;
1656   b->ht_insert_ct = 0;
1657 
1658   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1659   if (flg) {
1660     PetscReal fact = 1.39;
1661     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1662     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1663     if (fact <= 1.0) fact = 1.39;
1664     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1665     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1666   }
1667   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1668                                      "MatStoreValues_MPISBAIJ",
1669                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1670   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1671                                      "MatRetrieveValues_MPISBAIJ",
1672                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1673   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1674                                      "MatGetDiagonalBlock_MPISBAIJ",
1675                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1677                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1678                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1679   B->symmetric                  = PETSC_TRUE;
1680   B->structurally_symmetric     = PETSC_TRUE;
1681   B->symmetric_set              = PETSC_TRUE;
1682   B->structurally_symmetric_set = PETSC_TRUE;
1683   PetscFunctionReturn(0);
1684 }
1685 EXTERN_C_END
1686 
1687 /*MC
1688    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1689 
1690    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1691    and MATMPISBAIJ otherwise.
1692 
1693    Options Database Keys:
1694 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1695 
1696   Level: beginner
1697 
1698 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1699 M*/
1700 
1701 EXTERN_C_BEGIN
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatCreate_SBAIJ"
1704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1705 {
1706   PetscErrorCode ierr;
1707   PetscMPIInt    size;
1708 
1709   PetscFunctionBegin;
1710   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1711   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1712   if (size == 1) {
1713     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1714   } else {
1715     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 EXTERN_C_END
1720 
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1723 /*@C
1724    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1725    the user should preallocate the matrix storage by setting the parameters
1726    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1727    performance can be increased by more than a factor of 50.
1728 
1729    Collective on Mat
1730 
1731    Input Parameters:
1732 +  A - the matrix
1733 .  bs   - size of blockk
1734 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1735            submatrix  (same for all local rows)
1736 .  d_nnz - array containing the number of block nonzeros in the various block rows
1737            in the upper triangular and diagonal part of the in diagonal portion of the local
1738            (possibly different for each block row) or PETSC_NULL.  You must leave room
1739            for the diagonal entry even if it is zero.
1740 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1741            submatrix (same for all local rows).
1742 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1743            off-diagonal portion of the local submatrix (possibly different for
1744            each block row) or PETSC_NULL.
1745 
1746 
1747    Options Database Keys:
1748 .   -mat_no_unroll - uses code that does not unroll the loops in the
1749                      block calculations (much slower)
1750 .   -mat_block_size - size of the blocks to use
1751 
1752    Notes:
1753 
1754    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1755    than it must be used on all processors that share the object for that argument.
1756 
1757    If the *_nnz parameter is given then the *_nz parameter is ignored
1758 
1759    Storage Information:
1760    For a square global matrix we define each processor's diagonal portion
1761    to be its local rows and the corresponding columns (a square submatrix);
1762    each processor's off-diagonal portion encompasses the remainder of the
1763    local matrix (a rectangular submatrix).
1764 
1765    The user can specify preallocated storage for the diagonal part of
1766    the local submatrix with either d_nz or d_nnz (not both).  Set
1767    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1768    memory allocation.  Likewise, specify preallocated storage for the
1769    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1770 
1771    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1772    the figure below we depict these three local rows and all columns (0-11).
1773 
1774 .vb
1775            0 1 2 3 4 5 6 7 8 9 10 11
1776           -------------------
1777    row 3  |  o o o d d d o o o o o o
1778    row 4  |  o o o d d d o o o o o o
1779    row 5  |  o o o d d d o o o o o o
1780           -------------------
1781 .ve
1782 
1783    Thus, any entries in the d locations are stored in the d (diagonal)
1784    submatrix, and any entries in the o locations are stored in the
1785    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1786    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1787 
1788    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1789    plus the diagonal part of the d matrix,
1790    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1791    In general, for PDE problems in which most nonzeros are near the diagonal,
1792    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1793    or you will get TERRIBLE performance; see the users' manual chapter on
1794    matrices.
1795 
1796    Level: intermediate
1797 
1798 .keywords: matrix, block, aij, compressed row, sparse, parallel
1799 
1800 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1801 @*/
1802 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1803 {
1804   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1805 
1806   PetscFunctionBegin;
1807   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1808   if (f) {
1809     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatCreateMPISBAIJ"
1816 /*@C
1817    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1818    (block compressed row).  For good matrix assembly performance
1819    the user should preallocate the matrix storage by setting the parameters
1820    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1821    performance can be increased by more than a factor of 50.
1822 
1823    Collective on MPI_Comm
1824 
1825    Input Parameters:
1826 +  comm - MPI communicator
1827 .  bs   - size of blockk
1828 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1829            This value should be the same as the local size used in creating the
1830            y vector for the matrix-vector product y = Ax.
1831 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1832            This value should be the same as the local size used in creating the
1833            x vector for the matrix-vector product y = Ax.
1834 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1835 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1836 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1837            submatrix  (same for all local rows)
1838 .  d_nnz - array containing the number of block nonzeros in the various block rows
1839            in the upper triangular portion of the in diagonal portion of the local
1840            (possibly different for each block block row) or PETSC_NULL.
1841            You must leave room for the diagonal entry even if it is zero.
1842 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1843            submatrix (same for all local rows).
1844 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1845            off-diagonal portion of the local submatrix (possibly different for
1846            each block row) or PETSC_NULL.
1847 
1848    Output Parameter:
1849 .  A - the matrix
1850 
1851    Options Database Keys:
1852 .   -mat_no_unroll - uses code that does not unroll the loops in the
1853                      block calculations (much slower)
1854 .   -mat_block_size - size of the blocks to use
1855 .   -mat_mpi - use the parallel matrix data structures even on one processor
1856                (defaults to using SeqBAIJ format on one processor)
1857 
1858    Notes:
1859    The number of rows and columns must be divisible by blocksize.
1860 
1861    The user MUST specify either the local or global matrix dimensions
1862    (possibly both).
1863 
1864    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1865    than it must be used on all processors that share the object for that argument.
1866 
1867    If the *_nnz parameter is given then the *_nz parameter is ignored
1868 
1869    Storage Information:
1870    For a square global matrix we define each processor's diagonal portion
1871    to be its local rows and the corresponding columns (a square submatrix);
1872    each processor's off-diagonal portion encompasses the remainder of the
1873    local matrix (a rectangular submatrix).
1874 
1875    The user can specify preallocated storage for the diagonal part of
1876    the local submatrix with either d_nz or d_nnz (not both).  Set
1877    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1878    memory allocation.  Likewise, specify preallocated storage for the
1879    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1880 
1881    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1882    the figure below we depict these three local rows and all columns (0-11).
1883 
1884 .vb
1885            0 1 2 3 4 5 6 7 8 9 10 11
1886           -------------------
1887    row 3  |  o o o d d d o o o o o o
1888    row 4  |  o o o d d d o o o o o o
1889    row 5  |  o o o d d d o o o o o o
1890           -------------------
1891 .ve
1892 
1893    Thus, any entries in the d locations are stored in the d (diagonal)
1894    submatrix, and any entries in the o locations are stored in the
1895    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1896    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1897 
1898    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1899    plus the diagonal part of the d matrix,
1900    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1901    In general, for PDE problems in which most nonzeros are near the diagonal,
1902    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1903    or you will get TERRIBLE performance; see the users' manual chapter on
1904    matrices.
1905 
1906    Level: intermediate
1907 
1908 .keywords: matrix, block, aij, compressed row, sparse, parallel
1909 
1910 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1911 @*/
1912 
1913 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1914 {
1915   PetscErrorCode ierr;
1916   PetscMPIInt    size;
1917 
1918   PetscFunctionBegin;
1919   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1920   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1921   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1922   if (size > 1) {
1923     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1924     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1925   } else {
1926     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1927     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 
1933 #undef __FUNCT__
1934 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1935 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1936 {
1937   Mat            mat;
1938   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1939   PetscErrorCode ierr;
1940   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1941   PetscScalar    *array;
1942 
1943   PetscFunctionBegin;
1944   *newmat       = 0;
1945   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1946   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1947   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1948   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1949 
1950   mat->factor       = matin->factor;
1951   mat->preallocated = PETSC_TRUE;
1952   mat->assembled    = PETSC_TRUE;
1953   mat->insertmode   = NOT_SET_VALUES;
1954 
1955   a = (Mat_MPISBAIJ*)mat->data;
1956   mat->bs  = matin->bs;
1957   a->bs2   = oldmat->bs2;
1958   a->mbs   = oldmat->mbs;
1959   a->nbs   = oldmat->nbs;
1960   a->Mbs   = oldmat->Mbs;
1961   a->Nbs   = oldmat->Nbs;
1962 
1963   a->rstart       = oldmat->rstart;
1964   a->rend         = oldmat->rend;
1965   a->cstart       = oldmat->cstart;
1966   a->cend         = oldmat->cend;
1967   a->size         = oldmat->size;
1968   a->rank         = oldmat->rank;
1969   a->donotstash   = oldmat->donotstash;
1970   a->roworiented  = oldmat->roworiented;
1971   a->rowindices   = 0;
1972   a->rowvalues    = 0;
1973   a->getrowactive = PETSC_FALSE;
1974   a->barray       = 0;
1975   a->rstart_bs    = oldmat->rstart_bs;
1976   a->rend_bs      = oldmat->rend_bs;
1977   a->cstart_bs    = oldmat->cstart_bs;
1978   a->cend_bs      = oldmat->cend_bs;
1979 
1980   /* hash table stuff */
1981   a->ht           = 0;
1982   a->hd           = 0;
1983   a->ht_size      = 0;
1984   a->ht_flag      = oldmat->ht_flag;
1985   a->ht_fact      = oldmat->ht_fact;
1986   a->ht_total_ct  = 0;
1987   a->ht_insert_ct = 0;
1988 
1989   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1990   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1991   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1992   if (oldmat->colmap) {
1993 #if defined (PETSC_USE_CTABLE)
1994     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1995 #else
1996     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1997     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1998     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1999 #endif
2000   } else a->colmap = 0;
2001 
2002   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2003     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2004     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2005     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2006   } else a->garray = 0;
2007 
2008   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2009   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2010   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2011   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2012 
2013   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2014   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2015   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2016   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2017 
2018   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2019   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2020   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2021   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2022   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2023   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2024   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2025   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2026   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2027   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2028   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2029   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2030   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2031 
2032   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2033   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2034   a->sMvctx = oldmat->sMvctx;
2035   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2036 
2037   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2038   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2039   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2040   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2041   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2042   *newmat = mat;
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 #include "petscsys.h"
2047 
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatLoad_MPISBAIJ"
2050 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2051 {
2052   Mat            A;
2053   PetscErrorCode ierr;
2054   PetscInt       i,nz,j,rstart,rend;
2055   PetscScalar    *vals,*buf;
2056   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2057   MPI_Status     status;
2058   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2059   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2060   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2061   PetscInt       bs=1,Mbs,mbs,extra_rows;
2062   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2063   PetscInt       dcount,kmax,k,nzcount,tmp;
2064   int            fd;
2065 
2066   PetscFunctionBegin;
2067   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2068 
2069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2070   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2071   if (!rank) {
2072     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2073     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2074     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2075     if (header[3] < 0) {
2076       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2077     }
2078   }
2079 
2080   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2081   M = header[1]; N = header[2];
2082 
2083   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2084 
2085   /*
2086      This code adds extra rows to make sure the number of rows is
2087      divisible by the blocksize
2088   */
2089   Mbs        = M/bs;
2090   extra_rows = bs - M + bs*(Mbs);
2091   if (extra_rows == bs) extra_rows = 0;
2092   else                  Mbs++;
2093   if (extra_rows &&!rank) {
2094     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2095   }
2096 
2097   /* determine ownership of all rows */
2098   mbs        = Mbs/size + ((Mbs % size) > rank);
2099   m          = mbs*bs;
2100   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2101   browners   = rowners + size + 1;
2102   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2103   rowners[0] = 0;
2104   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2105   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2106   rstart = rowners[rank];
2107   rend   = rowners[rank+1];
2108 
2109   /* distribute row lengths to all processors */
2110   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2111   if (!rank) {
2112     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2113     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2114     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2115     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2116     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2117     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2118     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2119   } else {
2120     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2121   }
2122 
2123   if (!rank) {   /* procs[0] */
2124     /* calculate the number of nonzeros on each processor */
2125     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2126     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2127     for (i=0; i<size; i++) {
2128       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2129         procsnz[i] += rowlengths[j];
2130       }
2131     }
2132     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2133 
2134     /* determine max buffer needed and allocate it */
2135     maxnz = 0;
2136     for (i=0; i<size; i++) {
2137       maxnz = PetscMax(maxnz,procsnz[i]);
2138     }
2139     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2140 
2141     /* read in my part of the matrix column indices  */
2142     nz     = procsnz[0];
2143     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2144     mycols = ibuf;
2145     if (size == 1)  nz -= extra_rows;
2146     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2147     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2148 
2149     /* read in every ones (except the last) and ship off */
2150     for (i=1; i<size-1; i++) {
2151       nz   = procsnz[i];
2152       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2153       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2154     }
2155     /* read in the stuff for the last proc */
2156     if (size != 1) {
2157       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2158       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2159       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2160       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2161     }
2162     ierr = PetscFree(cols);CHKERRQ(ierr);
2163   } else {  /* procs[i], i>0 */
2164     /* determine buffer space needed for message */
2165     nz = 0;
2166     for (i=0; i<m; i++) {
2167       nz += locrowlens[i];
2168     }
2169     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2170     mycols = ibuf;
2171     /* receive message of column indices*/
2172     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2173     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2174     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2175   }
2176 
2177   /* loop over local rows, determining number of off diagonal entries */
2178   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2179   odlens   = dlens + (rend-rstart);
2180   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2181   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2182   masked1  = mask    + Mbs;
2183   masked2  = masked1 + Mbs;
2184   rowcount = 0; nzcount = 0;
2185   for (i=0; i<mbs; i++) {
2186     dcount  = 0;
2187     odcount = 0;
2188     for (j=0; j<bs; j++) {
2189       kmax = locrowlens[rowcount];
2190       for (k=0; k<kmax; k++) {
2191         tmp = mycols[nzcount++]/bs; /* block col. index */
2192         if (!mask[tmp]) {
2193           mask[tmp] = 1;
2194           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2195           else masked1[dcount++] = tmp; /* entry in diag portion */
2196         }
2197       }
2198       rowcount++;
2199     }
2200 
2201     dlens[i]  = dcount;  /* d_nzz[i] */
2202     odlens[i] = odcount; /* o_nzz[i] */
2203 
2204     /* zero out the mask elements we set */
2205     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2206     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2207   }
2208 
2209   /* create our matrix */
2210   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2211   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2212   ierr = MatSetType(A,type);CHKERRQ(ierr);
2213   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2214   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2215 
2216   if (!rank) {
2217     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2218     /* read in my part of the matrix numerical values  */
2219     nz = procsnz[0];
2220     vals = buf;
2221     mycols = ibuf;
2222     if (size == 1)  nz -= extra_rows;
2223     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2224     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2225 
2226     /* insert into matrix */
2227     jj      = rstart*bs;
2228     for (i=0; i<m; i++) {
2229       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2230       mycols += locrowlens[i];
2231       vals   += locrowlens[i];
2232       jj++;
2233     }
2234 
2235     /* read in other processors (except the last one) and ship out */
2236     for (i=1; i<size-1; i++) {
2237       nz   = procsnz[i];
2238       vals = buf;
2239       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2240       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2241     }
2242     /* the last proc */
2243     if (size != 1){
2244       nz   = procsnz[i] - extra_rows;
2245       vals = buf;
2246       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2247       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2248       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2249     }
2250     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2251 
2252   } else {
2253     /* receive numeric values */
2254     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2255 
2256     /* receive message of values*/
2257     vals   = buf;
2258     mycols = ibuf;
2259     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2260     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2261     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2262 
2263     /* insert into matrix */
2264     jj      = rstart*bs;
2265     for (i=0; i<m; i++) {
2266       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2267       mycols += locrowlens[i];
2268       vals   += locrowlens[i];
2269       jj++;
2270     }
2271   }
2272 
2273   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2274   ierr = PetscFree(buf);CHKERRQ(ierr);
2275   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2276   ierr = PetscFree(rowners);CHKERRQ(ierr);
2277   ierr = PetscFree(dlens);CHKERRQ(ierr);
2278   ierr = PetscFree(mask);CHKERRQ(ierr);
2279   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2280   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2281   *newmat = A;
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 #undef __FUNCT__
2286 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2287 /*XXXXX@
2288    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2289 
2290    Input Parameters:
2291 .  mat  - the matrix
2292 .  fact - factor
2293 
2294    Collective on Mat
2295 
2296    Level: advanced
2297 
2298   Notes:
2299    This can also be set by the command line option: -mat_use_hash_table fact
2300 
2301 .keywords: matrix, hashtable, factor, HT
2302 
2303 .seealso: MatSetOption()
2304 @XXXXX*/
2305 
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2309 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2310 {
2311   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2312   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2313   PetscReal      atmp;
2314   PetscReal      *work,*svalues,*rvalues;
2315   PetscErrorCode ierr;
2316   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2317   PetscMPIInt    rank,size;
2318   PetscInt       *rowners_bs,dest,count,source;
2319   PetscScalar    *va;
2320   MatScalar      *ba;
2321   MPI_Status     stat;
2322 
2323   PetscFunctionBegin;
2324   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2325   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2326 
2327   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2328   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2329 
2330   bs   = A->bs;
2331   mbs  = a->mbs;
2332   Mbs  = a->Mbs;
2333   ba   = b->a;
2334   bi   = b->i;
2335   bj   = b->j;
2336 
2337   /* find ownerships */
2338   rowners_bs = a->rowners_bs;
2339 
2340   /* each proc creates an array to be distributed */
2341   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2342   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2343 
2344   /* row_max for B */
2345   if (rank != size-1){
2346     for (i=0; i<mbs; i++) {
2347       ncols = bi[1] - bi[0]; bi++;
2348       brow  = bs*i;
2349       for (j=0; j<ncols; j++){
2350         bcol = bs*(*bj);
2351         for (kcol=0; kcol<bs; kcol++){
2352           col = bcol + kcol;                 /* local col index */
2353           col += rowners_bs[rank+1];      /* global col index */
2354           for (krow=0; krow<bs; krow++){
2355             atmp = PetscAbsScalar(*ba); ba++;
2356             row = brow + krow;    /* local row index */
2357             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2358             if (work[col] < atmp) work[col] = atmp;
2359           }
2360         }
2361         bj++;
2362       }
2363     }
2364 
2365     /* send values to its owners */
2366     for (dest=rank+1; dest<size; dest++){
2367       svalues = work + rowners_bs[dest];
2368       count   = rowners_bs[dest+1]-rowners_bs[dest];
2369       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2370     }
2371   }
2372 
2373   /* receive values */
2374   if (rank){
2375     rvalues = work;
2376     count   = rowners_bs[rank+1]-rowners_bs[rank];
2377     for (source=0; source<rank; source++){
2378       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2379       /* process values */
2380       for (i=0; i<count; i++){
2381         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2382       }
2383     }
2384   }
2385 
2386   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2387   ierr = PetscFree(work);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatRelax_MPISBAIJ"
2393 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2394 {
2395   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2396   PetscErrorCode ierr;
2397   PetscInt       mbs=mat->mbs,bs=matin->bs;
2398   PetscScalar    *x,*b,*ptr,zero=0.0;
2399   Vec            bb1;
2400 
2401   PetscFunctionBegin;
2402   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2403   if (bs > 1)
2404     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2405 
2406   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2407     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2408       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2409       its--;
2410     }
2411 
2412     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2413     while (its--){
2414 
2415       /* lower triangular part: slvec0b = - B^T*xx */
2416       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2417 
2418       /* copy xx into slvec0a */
2419       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2420       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2421       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2422       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2423 
2424       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2425 
2426       /* copy bb into slvec1a */
2427       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2428       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2429       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2430       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2431 
2432       /* set slvec1b = 0 */
2433       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2434 
2435       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2436       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2437       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2438       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2439 
2440       /* upper triangular part: bb1 = bb1 - B*x */
2441       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2442 
2443       /* local diagonal sweep */
2444       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2445     }
2446     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2447   } else {
2448     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2449   }
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2455 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2456 {
2457   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2458   PetscErrorCode ierr;
2459   Vec            lvec1,bb1;
2460 
2461   PetscFunctionBegin;
2462   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2463   if (matin->bs > 1)
2464     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2465 
2466   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2467     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2468       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2469       its--;
2470     }
2471 
2472     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2473     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2474     while (its--){
2475       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2476 
2477       /* lower diagonal part: bb1 = bb - B^T*xx */
2478       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2479       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2480 
2481       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2482       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2483       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2484 
2485       /* upper diagonal part: bb1 = bb1 - B*x */
2486       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2487       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2488 
2489       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2490 
2491       /* diagonal sweep */
2492       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2493     }
2494     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2495     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2496   } else {
2497     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2498   }
2499   PetscFunctionReturn(0);
2500 }
2501 
2502