xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 046da0a13659bf6557c8549a2fea89880b2ef309)
1 
2 /*
3     Defines the basic matrix operations for the SBAIJ (compressed row)
4   matrix storage format.
5 */
6 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
7 #include "src/inline/spops.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 
10 #define CHUNKSIZE  10
11 
12 /*
13      Checks for missing diagonals
14 */
15 #undef __FUNCT__
16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
18 {
19   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
20   PetscErrorCode ierr;
21   int         *diag,*jj = a->j,i;
22 
23   PetscFunctionBegin;
24   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
25   diag = a->diag;
26   for (i=0; i<a->mbs; i++) {
27     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
28   }
29   PetscFunctionReturn(0);
30 }
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
35 {
36   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
37   PetscErrorCode ierr;
38   int          i,mbs = a->mbs;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int nz = a->i[n];
63     for (i=0; i<nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int nz = a->i[n]-1;
84     for (i=0; i<nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 PetscErrorCode MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   PetscErrorCode ierr;
107 
108   PetscFunctionBegin;
109   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz);
110   ierr = PetscFree(a->a);CHKERRQ(ierr);
111   if (!a->singlemalloc) {
112     ierr = PetscFree(a->i);CHKERRQ(ierr);
113     ierr = PetscFree(a->j);CHKERRQ(ierr);
114   }
115   if (a->row) {
116     ierr = ISDestroy(a->row);CHKERRQ(ierr);
117   }
118   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
119   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
120   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
121   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
122   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
123   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
124   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
125   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
126   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
127 
128   if (a->inew){
129     ierr = PetscFree(a->inew);CHKERRQ(ierr);
130     a->inew = 0;
131   }
132   ierr = PetscFree(a);CHKERRQ(ierr);
133 
134   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
135   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
136   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
137   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
138   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
139   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 #undef __FUNCT__
144 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
146 {
147   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
148 
149   PetscFunctionBegin;
150   switch (op) {
151   case MAT_ROW_ORIENTED:
152     a->roworiented    = PETSC_TRUE;
153     break;
154   case MAT_COLUMN_ORIENTED:
155     a->roworiented    = PETSC_FALSE;
156     break;
157   case MAT_COLUMNS_SORTED:
158     a->sorted         = PETSC_TRUE;
159     break;
160   case MAT_COLUMNS_UNSORTED:
161     a->sorted         = PETSC_FALSE;
162     break;
163   case MAT_KEEP_ZEROED_ROWS:
164     a->keepzeroedrows = PETSC_TRUE;
165     break;
166   case MAT_NO_NEW_NONZERO_LOCATIONS:
167     a->nonew          = 1;
168     break;
169   case MAT_NEW_NONZERO_LOCATION_ERR:
170     a->nonew          = -1;
171     break;
172   case MAT_NEW_NONZERO_ALLOCATION_ERR:
173     a->nonew          = -2;
174     break;
175   case MAT_YES_NEW_NONZERO_LOCATIONS:
176     a->nonew          = 0;
177     break;
178   case MAT_ROWS_SORTED:
179   case MAT_ROWS_UNSORTED:
180   case MAT_YES_NEW_DIAGONALS:
181   case MAT_IGNORE_OFF_PROC_ENTRIES:
182   case MAT_USE_HASH_TABLE:
183     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
184     break;
185   case MAT_NO_NEW_DIAGONALS:
186     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
187   case MAT_NOT_SYMMETRIC:
188   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
189   case MAT_HERMITIAN:
190     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
191   case MAT_SYMMETRIC:
192   case MAT_STRUCTURALLY_SYMMETRIC:
193   case MAT_NOT_HERMITIAN:
194   case MAT_SYMMETRY_ETERNAL:
195   case MAT_NOT_SYMMETRY_ETERNAL:
196     break;
197   default:
198     SETERRQ(PETSC_ERR_SUP,"unknown option");
199   }
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNCT__
204 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
205 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
206 {
207   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
208   PetscErrorCode ierr;
209   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
210   MatScalar    *aa,*aa_i;
211   PetscScalar  *v_i;
212 
213   PetscFunctionBegin;
214   bs  = a->bs;
215   ai  = a->i;
216   aj  = a->j;
217   aa  = a->a;
218   bs2 = a->bs2;
219 
220   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
221 
222   bn  = row/bs;   /* Block number */
223   bp  = row % bs; /* Block position */
224   M   = ai[bn+1] - ai[bn];
225   *ncols = bs*M;
226 
227   if (v) {
228     *v = 0;
229     if (*ncols) {
230       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
231       for (i=0; i<M; i++) { /* for each block in the block row */
232         v_i  = *v + i*bs;
233         aa_i = aa + bs2*(ai[bn] + i);
234         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
235       }
236     }
237   }
238 
239   if (cols) {
240     *cols = 0;
241     if (*ncols) {
242       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
243       for (i=0; i<M; i++) { /* for each block in the block row */
244         cols_i = *cols + i*bs;
245         itmp  = bs*aj[ai[bn] + i];
246         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
247       }
248     }
249   }
250 
251   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
252   /* this segment is currently removed, so only entries in the upper triangle are obtained */
253 #ifdef column_search
254   v_i    = *v    + M*bs;
255   cols_i = *cols + M*bs;
256   for (i=0; i<bn; i++){ /* for each block row */
257     M = ai[i+1] - ai[i];
258     for (j=0; j<M; j++){
259       itmp = aj[ai[i] + j];    /* block column value */
260       if (itmp == bn){
261         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
262         for (k=0; k<bs; k++) {
263           *cols_i++ = i*bs+k;
264           *v_i++    = aa_i[k];
265         }
266         *ncols += bs;
267         break;
268       }
269     }
270   }
271 #endif
272 
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
278 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
279 {
280   PetscErrorCode ierr;
281 
282   PetscFunctionBegin;
283   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
284   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
285   PetscFunctionReturn(0);
286 }
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
290 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
291 {
292   PetscErrorCode ierr;
293   PetscFunctionBegin;
294   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
300 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
301 {
302   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
303   PetscErrorCode ierr;
304   int i,j,bs = a->bs,k,l,bs2=a->bs2;
305   char              *name;
306   PetscViewerFormat format;
307 
308   PetscFunctionBegin;
309   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
310   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
311   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
312     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
313   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
314     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
315   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
316     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
317     for (i=0; i<a->mbs; i++) {
318       for (j=0; j<bs; j++) {
319         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
320         for (k=a->i[i]; k<a->i[i+1]; k++) {
321           for (l=0; l<bs; l++) {
322 #if defined(PETSC_USE_COMPLEX)
323             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
324               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
325                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
326             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
327               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
328                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
329             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
330               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
331             }
332 #else
333             if (a->a[bs2*k + l*bs + j] != 0.0) {
334               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
335             }
336 #endif
337           }
338         }
339         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
340       }
341     }
342     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
343   } else {
344     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
345     for (i=0; i<a->mbs; i++) {
346       for (j=0; j<bs; j++) {
347         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
348         for (k=a->i[i]; k<a->i[i+1]; k++) {
349           for (l=0; l<bs; l++) {
350 #if defined(PETSC_USE_COMPLEX)
351             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
352               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
353                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
354             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
355               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
356                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
357             } else {
358               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
359             }
360 #else
361             ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
362 #endif
363           }
364         }
365         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
366       }
367     }
368     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
369   }
370   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
371   PetscFunctionReturn(0);
372 }
373 
374 #undef __FUNCT__
375 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
376 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
377 {
378   Mat          A = (Mat) Aa;
379   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
380   PetscErrorCode ierr;
381   int          row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
382   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
383   MatScalar    *aa;
384   MPI_Comm     comm;
385   PetscViewer  viewer;
386 
387   PetscFunctionBegin;
388   /*
389       This is nasty. If this is called from an originally parallel matrix
390    then all processes call this,but only the first has the matrix so the
391    rest should return immediately.
392   */
393   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
394   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
395   if (rank) PetscFunctionReturn(0);
396 
397   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
398 
399   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
400   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
401 
402   /* loop over matrix elements drawing boxes */
403   color = PETSC_DRAW_BLUE;
404   for (i=0,row=0; i<mbs; i++,row+=bs) {
405     for (j=a->i[i]; j<a->i[i+1]; j++) {
406       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
407       x_l = a->j[j]*bs; x_r = x_l + 1.0;
408       aa = a->a + j*bs2;
409       for (k=0; k<bs; k++) {
410         for (l=0; l<bs; l++) {
411           if (PetscRealPart(*aa++) >=  0.) continue;
412           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
413         }
414       }
415     }
416   }
417   color = PETSC_DRAW_CYAN;
418   for (i=0,row=0; i<mbs; i++,row+=bs) {
419     for (j=a->i[i]; j<a->i[i+1]; j++) {
420       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
421       x_l = a->j[j]*bs; x_r = x_l + 1.0;
422       aa = a->a + j*bs2;
423       for (k=0; k<bs; k++) {
424         for (l=0; l<bs; l++) {
425           if (PetscRealPart(*aa++) != 0.) continue;
426           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
427         }
428       }
429     }
430   }
431 
432   color = PETSC_DRAW_RED;
433   for (i=0,row=0; i<mbs; i++,row+=bs) {
434     for (j=a->i[i]; j<a->i[i+1]; j++) {
435       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
436       x_l = a->j[j]*bs; x_r = x_l + 1.0;
437       aa = a->a + j*bs2;
438       for (k=0; k<bs; k++) {
439         for (l=0; l<bs; l++) {
440           if (PetscRealPart(*aa++) <= 0.) continue;
441           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
442         }
443       }
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
451 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
452 {
453   PetscErrorCode ierr;
454   PetscReal    xl,yl,xr,yr,w,h;
455   PetscDraw    draw;
456   PetscTruth   isnull;
457 
458   PetscFunctionBegin;
459 
460   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
461   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
462 
463   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
464   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
465   xr += w;    yr += h;  xl = -w;     yl = -h;
466   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
467   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
468   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatView_SeqSBAIJ"
474 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
475 {
476   PetscErrorCode ierr;
477   PetscTruth iascii,isdraw;
478 
479   PetscFunctionBegin;
480   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
481   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
482   if (iascii){
483     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
484   } else if (isdraw) {
485     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
486   } else {
487     Mat B;
488     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
489     ierr = MatView(B,viewer);CHKERRQ(ierr);
490     ierr = MatDestroy(B);CHKERRQ(ierr);
491   }
492   PetscFunctionReturn(0);
493 }
494 
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
498 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
499 {
500   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
501   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
502   int        *ai = a->i,*ailen = a->ilen;
503   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
504   MatScalar  *ap,*aa = a->a,zero = 0.0;
505 
506   PetscFunctionBegin;
507   for (k=0; k<m; k++) { /* loop over rows */
508     row  = im[k]; brow = row/bs;
509     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
510     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
511     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
512     nrow = ailen[brow];
513     for (l=0; l<n; l++) { /* loop over columns */
514       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]);
515       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
516       col  = in[l] ;
517       bcol = col/bs;
518       cidx = col%bs;
519       ridx = row%bs;
520       high = nrow;
521       low  = 0; /* assume unsorted */
522       while (high-low > 5) {
523         t = (low+high)/2;
524         if (rp[t] > bcol) high = t;
525         else             low  = t;
526       }
527       for (i=low; i<high; i++) {
528         if (rp[i] > bcol) break;
529         if (rp[i] == bcol) {
530           *v++ = ap[bs2*i+bs*cidx+ridx];
531           goto finished;
532         }
533       }
534       *v++ = zero;
535       finished:;
536     }
537   }
538   PetscFunctionReturn(0);
539 }
540 
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
544 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
545 {
546   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
547   PetscErrorCode ierr;
548   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
549   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
550   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
551   PetscTruth      roworiented=a->roworiented;
552   const MatScalar *value = v;
553   MatScalar       *ap,*aa = a->a,*bap;
554 
555   PetscFunctionBegin;
556   if (roworiented) {
557     stepval = (n-1)*bs;
558   } else {
559     stepval = (m-1)*bs;
560   }
561   for (k=0; k<m; k++) { /* loop over added rows */
562     row  = im[k];
563     if (row < 0) continue;
564 #if defined(PETSC_USE_BOPT_g)
565     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
566 #endif
567     rp   = aj + ai[row];
568     ap   = aa + bs2*ai[row];
569     rmax = imax[row];
570     nrow = ailen[row];
571     low  = 0;
572     for (l=0; l<n; l++) { /* loop over added columns */
573       if (in[l] < 0) continue;
574       col = in[l];
575 #if defined(PETSC_USE_BOPT_g)
576       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
577 #endif
578       if (col < row) continue; /* ignore lower triangular block */
579       if (roworiented) {
580         value = v + k*(stepval+bs)*bs + l*bs;
581       } else {
582         value = v + l*(stepval+bs)*bs + k*bs;
583       }
584       if (!sorted) low = 0; high = nrow;
585       while (high-low > 7) {
586         t = (low+high)/2;
587         if (rp[t] > col) high = t;
588         else             low  = t;
589       }
590       for (i=low; i<high; i++) {
591         if (rp[i] > col) break;
592         if (rp[i] == col) {
593           bap  = ap +  bs2*i;
594           if (roworiented) {
595             if (is == ADD_VALUES) {
596               for (ii=0; ii<bs; ii++,value+=stepval) {
597                 for (jj=ii; jj<bs2; jj+=bs) {
598                   bap[jj] += *value++;
599                 }
600               }
601             } else {
602               for (ii=0; ii<bs; ii++,value+=stepval) {
603                 for (jj=ii; jj<bs2; jj+=bs) {
604                   bap[jj] = *value++;
605                 }
606               }
607             }
608           } else {
609             if (is == ADD_VALUES) {
610               for (ii=0; ii<bs; ii++,value+=stepval) {
611                 for (jj=0; jj<bs; jj++) {
612                   *bap++ += *value++;
613                 }
614               }
615             } else {
616               for (ii=0; ii<bs; ii++,value+=stepval) {
617                 for (jj=0; jj<bs; jj++) {
618                   *bap++  = *value++;
619                 }
620               }
621             }
622           }
623           goto noinsert2;
624         }
625       }
626       if (nonew == 1) goto noinsert2;
627       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
628       if (nrow >= rmax) {
629         /* there is no extra room in row, therefore enlarge */
630         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
631         MatScalar *new_a;
632 
633         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
634 
635         /* malloc new storage space */
636         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
637 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
638         new_j   = (int*)(new_a + bs2*new_nz);
639         new_i   = new_j + new_nz;
640 
641         /* copy over old data into new slots */
642         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
643         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
644         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
645         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
646         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
647         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
648         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
649         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
650         /* free up old matrix storage */
651         ierr = PetscFree(a->a);CHKERRQ(ierr);
652         if (!a->singlemalloc) {
653           ierr = PetscFree(a->i);CHKERRQ(ierr);
654           ierr = PetscFree(a->j);CHKERRQ(ierr);
655         }
656         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
657         a->singlemalloc = PETSC_TRUE;
658 
659         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
660         rmax = imax[row] = imax[row] + CHUNKSIZE;
661         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
662         a->maxnz += bs2*CHUNKSIZE;
663         a->reallocs++;
664         a->nz++;
665       }
666       N = nrow++ - 1;
667       /* shift up all the later entries in this row */
668       for (ii=N; ii>=i; ii--) {
669         rp[ii+1] = rp[ii];
670         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
671       }
672       if (N >= i) {
673         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
674       }
675       rp[i] = col;
676       bap   = ap +  bs2*i;
677       if (roworiented) {
678         for (ii=0; ii<bs; ii++,value+=stepval) {
679           for (jj=ii; jj<bs2; jj+=bs) {
680             bap[jj] = *value++;
681           }
682         }
683       } else {
684         for (ii=0; ii<bs; ii++,value+=stepval) {
685           for (jj=0; jj<bs; jj++) {
686             *bap++  = *value++;
687           }
688         }
689       }
690       noinsert2:;
691       low = i;
692     }
693     ailen[row] = nrow;
694   }
695   PetscFunctionReturn(0);
696 }
697 
698 #undef __FUNCT__
699 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
700 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
701 {
702   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
703   PetscErrorCode ierr;
704   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
705   int        m = A->m,*ip,N,*ailen = a->ilen;
706   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0;
707   MatScalar  *aa = a->a,*ap;
708 
709   PetscFunctionBegin;
710   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
711 
712   if (m) rmax = ailen[0];
713   for (i=1; i<mbs; i++) {
714     /* move each row back by the amount of empty slots (fshift) before it*/
715     fshift += imax[i-1] - ailen[i-1];
716     rmax   = PetscMax(rmax,ailen[i]);
717     if (fshift) {
718       ip = aj + ai[i]; ap = aa + bs2*ai[i];
719       N = ailen[i];
720       for (j=0; j<N; j++) {
721         ip[j-fshift] = ip[j];
722         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
723       }
724     }
725     ai[i] = ai[i-1] + ailen[i-1];
726   }
727   if (mbs) {
728     fshift += imax[mbs-1] - ailen[mbs-1];
729     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
730   }
731   /* reset ilen and imax for each row */
732   for (i=0; i<mbs; i++) {
733     ailen[i] = imax[i] = ai[i+1] - ai[i];
734   }
735   a->nz = ai[mbs];
736 
737   /* diagonals may have moved, reset it */
738   if (a->diag) {
739     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
740   }
741   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",
742            m,A->m,a->bs,fshift*bs2,a->nz*bs2);
743   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",
744            a->reallocs);
745   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax);
746   a->reallocs          = 0;
747   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
748 
749   PetscFunctionReturn(0);
750 }
751 
752 /*
753    This function returns an array of flags which indicate the locations of contiguous
754    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
755    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
756    Assume: sizes should be long enough to hold all the values.
757 */
758 #undef __FUNCT__
759 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
760 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
761 {
762   int        i,j,k,row;
763   PetscTruth flg;
764 
765   PetscFunctionBegin;
766   for (i=0,j=0; i<n; j++) {
767     row = idx[i];
768     if (row%bs!=0) { /* Not the begining of a block */
769       sizes[j] = 1;
770       i++;
771     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
772       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
773       i++;
774     } else { /* Begining of the block, so check if the complete block exists */
775       flg = PETSC_TRUE;
776       for (k=1; k<bs; k++) {
777         if (row+k != idx[i+k]) { /* break in the block */
778           flg = PETSC_FALSE;
779           break;
780         }
781       }
782       if (flg == PETSC_TRUE) { /* No break in the bs */
783         sizes[j] = bs;
784         i+= bs;
785       } else {
786         sizes[j] = 1;
787         i++;
788       }
789     }
790   }
791   *bs_max = j;
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
797 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
798 {
799   PetscFunctionBegin;
800   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
801 }
802 
803 /* Only add/insert a(i,j) with i<=j (blocks).
804    Any a(i,j) with i>j input by user is ingored.
805 */
806 
807 #undef __FUNCT__
808 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
809 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
810 {
811   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
812   PetscErrorCode ierr;
813   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
814   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
815   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
816   int         ridx,cidx,bs2=a->bs2;
817   MatScalar   *ap,value,*aa=a->a,*bap;
818 
819   PetscFunctionBegin;
820 
821   for (k=0; k<m; k++) { /* loop over added rows */
822     row  = im[k];       /* row number */
823     brow = row/bs;      /* block row number */
824     if (row < 0) continue;
825 #if defined(PETSC_USE_BOPT_g)
826     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
827 #endif
828     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
829     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
830     rmax = imax[brow];  /* maximum space allocated for this row */
831     nrow = ailen[brow]; /* actual length of this row */
832     low  = 0;
833 
834     for (l=0; l<n; l++) { /* loop over added columns */
835       if (in[l] < 0) continue;
836 #if defined(PETSC_USE_BOPT_g)
837       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1);
838 #endif
839       col = in[l];
840       bcol = col/bs;              /* block col number */
841 
842       if (brow <= bcol){
843         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
844         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
845           /* element value a(k,l) */
846           if (roworiented) {
847             value = v[l + k*n];
848           } else {
849             value = v[k + l*m];
850           }
851 
852           /* move pointer bap to a(k,l) quickly and add/insert value */
853           if (!sorted) low = 0; high = nrow;
854           while (high-low > 7) {
855             t = (low+high)/2;
856             if (rp[t] > bcol) high = t;
857             else              low  = t;
858           }
859           for (i=low; i<high; i++) {
860             /* printf("The loop of i=low.., rp[%D]=%D\n",i,rp[i]); */
861             if (rp[i] > bcol) break;
862             if (rp[i] == bcol) {
863               bap  = ap +  bs2*i + bs*cidx + ridx;
864               if (is == ADD_VALUES) *bap += value;
865               else                  *bap  = value;
866               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
867               if (brow == bcol && ridx < cidx){
868                 bap  = ap +  bs2*i + bs*ridx + cidx;
869                 if (is == ADD_VALUES) *bap += value;
870                 else                  *bap  = value;
871               }
872               goto noinsert1;
873             }
874           }
875 
876       if (nonew == 1) goto noinsert1;
877       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
878       if (nrow >= rmax) {
879         /* there is no extra room in row, therefore enlarge */
880         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
881         MatScalar *new_a;
882 
883         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
884 
885         /* Malloc new storage space */
886         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
887         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
888         new_j = (int*)(new_a + bs2*new_nz);
889         new_i = new_j + new_nz;
890 
891         /* copy over old data into new slots */
892         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
893         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
894         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
895         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
896         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
897         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
898         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
899         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
900         /* free up old matrix storage */
901         ierr = PetscFree(a->a);CHKERRQ(ierr);
902         if (!a->singlemalloc) {
903           ierr = PetscFree(a->i);CHKERRQ(ierr);
904           ierr = PetscFree(a->j);CHKERRQ(ierr);
905         }
906         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
907         a->singlemalloc = PETSC_TRUE;
908 
909         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
910         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
911         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
912         a->maxnz += bs2*CHUNKSIZE;
913         a->reallocs++;
914         a->nz++;
915       }
916 
917       N = nrow++ - 1;
918       /* shift up all the later entries in this row */
919       for (ii=N; ii>=i; ii--) {
920         rp[ii+1] = rp[ii];
921         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
922       }
923       if (N>=i) {
924         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
925       }
926       rp[i]                      = bcol;
927       ap[bs2*i + bs*cidx + ridx] = value;
928       noinsert1:;
929       low = i;
930       /* } */
931         }
932       } /* end of if .. if..  */
933     }                     /* end of loop over added columns */
934     ailen[brow] = nrow;
935   }                       /* end of loop over added rows */
936 
937   PetscFunctionReturn(0);
938 }
939 
940 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
941 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
942 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
943 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
944 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
945 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat);
946 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
947 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
948 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
949 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
950 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
951 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
952 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
953 
954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
962 
963 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
964 
965 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
966 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
967 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
968 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
969 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
970 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
971 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
972 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
973 
974 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
975 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
976 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
977 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
978 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
979 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
980 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
981 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
982 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
983 
984 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
985 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
986 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
987 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
988 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
989 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
990 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
991 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
992 
993 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
994 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
995 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
996 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
997 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
998 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
999 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1000 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1001 
1002 #ifdef HAVE_MatICCFactor
1003 /* modified from MatILUFactor_SeqSBAIJ, needs further work!  */
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1006 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1007 {
1008   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1009   Mat         outA;
1010   PetscErrorCode ierr;
1011   PetscTruth  row_identity,col_identity;
1012 
1013   PetscFunctionBegin;
1014   outA          = inA;
1015   inA->factor   = FACTOR_CHOLESKY;
1016 
1017   if (!a->diag) {
1018     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1019   }
1020   /*
1021       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1022       for ILU(0) factorization with natural ordering
1023   */
1024   switch (a->bs) {
1025   case 1:
1026     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1027     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1028     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1029     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1030   case 2:
1031     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1032     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1033     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1034     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1035     break;
1036   case 3:
1037     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1038     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1039     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1040     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1041     break;
1042   case 4:
1043     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1044     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1045     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1046     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1047     break;
1048   case 5:
1049     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1050     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1051     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1052     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1053     break;
1054   case 6:
1055     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1056     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1057     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1058     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1059     break;
1060   case 7:
1061     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1062     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1063     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1064     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1065     break;
1066   default:
1067     a->row        = row;
1068     a->icol       = col;
1069     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1070     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1071 
1072     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1073     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1074     PetscLogObjectParent(inA,a->icol);
1075 
1076     if (!a->solve_work) {
1077       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1078       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1079     }
1080   }
1081 
1082   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1083 
1084   PetscFunctionReturn(0);
1085 }
1086 #endif
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1090 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1091 {
1092   static PetscTruth called = PETSC_FALSE;
1093   MPI_Comm          comm = A->comm;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1098   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1099   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 EXTERN_C_BEGIN
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1106 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1107 {
1108   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1109   int         i,nz,n;
1110 
1111   PetscFunctionBegin;
1112   nz = baij->maxnz;
1113   n  = mat->n;
1114   for (i=0; i<nz; i++) {
1115     baij->j[i] = indices[i];
1116   }
1117   baij->nz = nz;
1118   for (i=0; i<n; i++) {
1119     baij->ilen[i] = baij->imax[i];
1120   }
1121 
1122   PetscFunctionReturn(0);
1123 }
1124 EXTERN_C_END
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1128 /*@
1129     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1130        in the matrix.
1131 
1132   Input Parameters:
1133 +  mat     - the SeqSBAIJ matrix
1134 -  indices - the column indices
1135 
1136   Level: advanced
1137 
1138   Notes:
1139     This can be called if you have precomputed the nonzero structure of the
1140   matrix and want to provide it to the matrix object to improve the performance
1141   of the MatSetValues() operation.
1142 
1143     You MUST have set the correct numbers of nonzeros per row in the call to
1144   MatCreateSeqSBAIJ().
1145 
1146     MUST be called before any calls to MatSetValues()
1147 
1148 .seealso: MatCreateSeqSBAIJ
1149 @*/
1150 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1151 {
1152   PetscErrorCode ierr,(*f)(Mat,int *);
1153 
1154   PetscFunctionBegin;
1155   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1156   PetscValidPointer(indices,2);
1157   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1158   if (f) {
1159     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1160   } else {
1161     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1162   }
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #undef __FUNCT__
1167 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1168 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1169 {
1170   PetscErrorCode ierr;
1171 
1172   PetscFunctionBegin;
1173   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1179 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1180 {
1181   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1182   PetscFunctionBegin;
1183   *array = a->a;
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1189 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1190 {
1191   PetscFunctionBegin;
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #include "petscblaslapack.h"
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1198 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1199 {
1200   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1201   PetscErrorCode ierr;
1202   int            i,bs=y->bs,bs2,j;
1203   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1204 
1205   PetscFunctionBegin;
1206   if (str == SAME_NONZERO_PATTERN) {
1207     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1208   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1209     if (y->xtoy && y->XtoY != X) {
1210       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1211       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1212     }
1213     if (!y->xtoy) { /* get xtoy */
1214       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1215       y->XtoY = X;
1216     }
1217     bs2 = bs*bs;
1218     for (i=0; i<x->nz; i++) {
1219       j = 0;
1220       while (j < bs2){
1221         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1222         j++;
1223       }
1224     }
1225     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1226   } else {
1227     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1228   }
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1234 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1235 {
1236   PetscFunctionBegin;
1237   *flg = PETSC_TRUE;
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1243 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1244 {
1245   PetscFunctionBegin;
1246   *flg = PETSC_TRUE;
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1252 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1253 {
1254   PetscFunctionBegin;
1255   *flg = PETSC_FALSE;
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 /* -------------------------------------------------------------------*/
1260 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1261        MatGetRow_SeqSBAIJ,
1262        MatRestoreRow_SeqSBAIJ,
1263        MatMult_SeqSBAIJ_N,
1264 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1265        MatMult_SeqSBAIJ_N,
1266        MatMultAdd_SeqSBAIJ_N,
1267        MatSolve_SeqSBAIJ_N,
1268        0,
1269        0,
1270 /*10*/ 0,
1271        0,
1272        MatCholeskyFactor_SeqSBAIJ,
1273        MatRelax_SeqSBAIJ,
1274        MatTranspose_SeqSBAIJ,
1275 /*15*/ MatGetInfo_SeqSBAIJ,
1276        MatEqual_SeqSBAIJ,
1277        MatGetDiagonal_SeqSBAIJ,
1278        MatDiagonalScale_SeqSBAIJ,
1279        MatNorm_SeqSBAIJ,
1280 /*20*/ 0,
1281        MatAssemblyEnd_SeqSBAIJ,
1282        0,
1283        MatSetOption_SeqSBAIJ,
1284        MatZeroEntries_SeqSBAIJ,
1285 /*25*/ MatZeroRows_SeqSBAIJ,
1286        0,
1287        0,
1288        MatCholeskyFactorSymbolic_SeqSBAIJ,
1289        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1290 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1291        0,
1292        MatICCFactorSymbolic_SeqSBAIJ,
1293        MatGetArray_SeqSBAIJ,
1294        MatRestoreArray_SeqSBAIJ,
1295 /*35*/ MatDuplicate_SeqSBAIJ,
1296        0,
1297        0,
1298        0,
1299        0,
1300 /*40*/ MatAXPY_SeqSBAIJ,
1301        MatGetSubMatrices_SeqSBAIJ,
1302        MatIncreaseOverlap_SeqSBAIJ,
1303        MatGetValues_SeqSBAIJ,
1304        0,
1305 /*45*/ MatPrintHelp_SeqSBAIJ,
1306        MatScale_SeqSBAIJ,
1307        0,
1308        0,
1309        0,
1310 /*50*/ MatGetBlockSize_SeqSBAIJ,
1311        MatGetRowIJ_SeqSBAIJ,
1312        MatRestoreRowIJ_SeqSBAIJ,
1313        0,
1314        0,
1315 /*55*/ 0,
1316        0,
1317        0,
1318        0,
1319        MatSetValuesBlocked_SeqSBAIJ,
1320 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1321        0,
1322        0,
1323        MatGetPetscMaps_Petsc,
1324        0,
1325 /*65*/ 0,
1326        0,
1327        0,
1328        0,
1329        0,
1330 /*70*/ MatGetRowMax_SeqSBAIJ,
1331        0,
1332        0,
1333        0,
1334        0,
1335 /*75*/ 0,
1336        0,
1337        0,
1338        0,
1339        0,
1340 /*80*/ 0,
1341        0,
1342        0,
1343 #if !defined(PETSC_USE_COMPLEX)
1344        MatGetInertia_SeqSBAIJ,
1345 #else
1346        0,
1347 #endif
1348        MatLoad_SeqSBAIJ,
1349 /*85*/ MatIsSymmetric_SeqSBAIJ,
1350        MatIsHermitian_SeqSBAIJ,
1351        MatIsStructurallySymmetric_SeqSBAIJ,
1352        0,
1353        0,
1354 /*90*/ 0,
1355        0,
1356        0,
1357        0,
1358        0,
1359 /*95*/ 0,
1360        0,
1361        0,
1362        0};
1363 
1364 EXTERN_C_BEGIN
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1367 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat)
1368 {
1369   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1370   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1371   PetscErrorCode ierr;
1372 
1373   PetscFunctionBegin;
1374   if (aij->nonew != 1) {
1375     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1376   }
1377 
1378   /* allocate space for values if not already there */
1379   if (!aij->saved_values) {
1380     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1381   }
1382 
1383   /* copy values over */
1384   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1385   PetscFunctionReturn(0);
1386 }
1387 EXTERN_C_END
1388 
1389 EXTERN_C_BEGIN
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1392 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat)
1393 {
1394   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1395   PetscErrorCode ierr;
1396   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1397 
1398   PetscFunctionBegin;
1399   if (aij->nonew != 1) {
1400     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1401   }
1402   if (!aij->saved_values) {
1403     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1404   }
1405 
1406   /* copy values over */
1407   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 EXTERN_C_END
1411 
1412 EXTERN_C_BEGIN
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1415 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1416 {
1417   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1418   PetscErrorCode ierr;
1419   int          i,len,mbs,bs2;
1420   PetscTruth   flg;
1421 
1422   PetscFunctionBegin;
1423   B->preallocated = PETSC_TRUE;
1424   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1425   mbs  = B->m/bs;
1426   bs2  = bs*bs;
1427 
1428   if (mbs*bs != B->m) {
1429     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1430   }
1431 
1432   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1433   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1434   if (nnz) {
1435     for (i=0; i<mbs; i++) {
1436       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1437       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1438     }
1439   }
1440 
1441   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1442   if (!flg) {
1443     switch (bs) {
1444     case 1:
1445       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1446       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1447       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1448       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1449       B->ops->mult            = MatMult_SeqSBAIJ_1;
1450       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1451       break;
1452     case 2:
1453       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1454       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1455       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_2;
1456       B->ops->mult            = MatMult_SeqSBAIJ_2;
1457       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1458       break;
1459     case 3:
1460       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1461       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1462       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_3;
1463       B->ops->mult            = MatMult_SeqSBAIJ_3;
1464       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1465       break;
1466     case 4:
1467       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1468       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1469       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_4;
1470       B->ops->mult            = MatMult_SeqSBAIJ_4;
1471       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1472       break;
1473     case 5:
1474       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1475       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1476       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_5;
1477       B->ops->mult            = MatMult_SeqSBAIJ_5;
1478       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1479       break;
1480     case 6:
1481       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1482       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1483       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_6;
1484       B->ops->mult            = MatMult_SeqSBAIJ_6;
1485       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1486       break;
1487     case 7:
1488       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1489       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1490       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_7;
1491       B->ops->mult            = MatMult_SeqSBAIJ_7;
1492       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1493       break;
1494     }
1495   }
1496 
1497   b->mbs = mbs;
1498   b->nbs = mbs;
1499   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1500   if (!nnz) {
1501     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1502     else if (nz <= 0)        nz = 1;
1503     for (i=0; i<mbs; i++) {
1504       b->imax[i] = nz;
1505     }
1506     nz = nz*mbs; /* total nz */
1507   } else {
1508     nz = 0;
1509     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1510   }
1511   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1512 
1513   /* allocate the matrix space */
1514   len  = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1515   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1516   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1517   b->j = (int*)(b->a + nz*bs2);
1518   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1519   b->i = b->j + nz;
1520   b->singlemalloc = PETSC_TRUE;
1521 
1522   /* pointer to beginning of each row */
1523   b->i[0] = 0;
1524   for (i=1; i<mbs+1; i++) {
1525     b->i[i] = b->i[i-1] + b->imax[i-1];
1526   }
1527 
1528   /* b->ilen will count nonzeros in each block row so far. */
1529   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1530   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1531   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1532 
1533   b->bs               = bs;
1534   b->bs2              = bs2;
1535   b->nz             = 0;
1536   b->maxnz          = nz*bs2;
1537 
1538   b->inew             = 0;
1539   b->jnew             = 0;
1540   b->anew             = 0;
1541   b->a2anew           = 0;
1542   b->permute          = PETSC_FALSE;
1543   PetscFunctionReturn(0);
1544 }
1545 EXTERN_C_END
1546 
1547 EXTERN_C_BEGIN
1548 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1549 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*);
1550 EXTERN_C_END
1551 
1552 /*MC
1553    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1554    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1555 
1556    Options Database Keys:
1557 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1558 
1559   Level: beginner
1560 
1561 .seealso: MatCreateSeqSBAIJ
1562 M*/
1563 
1564 EXTERN_C_BEGIN
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1567 PetscErrorCode MatCreate_SeqSBAIJ(Mat B)
1568 {
1569   Mat_SeqSBAIJ *b;
1570   PetscErrorCode ierr;
1571   int size;
1572 
1573   PetscFunctionBegin;
1574   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1575   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1576   B->m = B->M = PetscMax(B->m,B->M);
1577   B->n = B->N = PetscMax(B->n,B->N);
1578 
1579   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1580   B->data = (void*)b;
1581   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1582   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1583   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1584   B->ops->view        = MatView_SeqSBAIJ;
1585   B->factor           = 0;
1586   B->lupivotthreshold = 1.0;
1587   B->mapping          = 0;
1588   b->row              = 0;
1589   b->icol             = 0;
1590   b->reallocs         = 0;
1591   b->saved_values     = 0;
1592 
1593   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1594   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1595 
1596   b->sorted           = PETSC_FALSE;
1597   b->roworiented      = PETSC_TRUE;
1598   b->nonew            = 0;
1599   b->diag             = 0;
1600   b->solve_work       = 0;
1601   b->mult_work        = 0;
1602   B->spptr            = 0;
1603   b->keepzeroedrows   = PETSC_FALSE;
1604   b->xtoy             = 0;
1605   b->XtoY             = 0;
1606 
1607   b->inew             = 0;
1608   b->jnew             = 0;
1609   b->anew             = 0;
1610   b->a2anew           = 0;
1611   b->permute          = PETSC_FALSE;
1612 
1613   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1614                                      "MatStoreValues_SeqSBAIJ",
1615                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1616   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1617                                      "MatRetrieveValues_SeqSBAIJ",
1618                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1619   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1620                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1621                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1622   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1623                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1624                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1626                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1627                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1629                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1630                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1631 
1632   B->symmetric                  = PETSC_TRUE;
1633   B->structurally_symmetric     = PETSC_TRUE;
1634   B->symmetric_set              = PETSC_TRUE;
1635   B->structurally_symmetric_set = PETSC_TRUE;
1636   PetscFunctionReturn(0);
1637 }
1638 EXTERN_C_END
1639 
1640 #undef __FUNCT__
1641 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1642 /*@C
1643    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1644    compressed row) format.  For good matrix assembly performance the
1645    user should preallocate the matrix storage by setting the parameter nz
1646    (or the array nnz).  By setting these parameters accurately, performance
1647    during matrix assembly can be increased by more than a factor of 50.
1648 
1649    Collective on Mat
1650 
1651    Input Parameters:
1652 +  A - the symmetric matrix
1653 .  bs - size of block
1654 .  nz - number of block nonzeros per block row (same for all rows)
1655 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1656          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1657 
1658    Options Database Keys:
1659 .   -mat_no_unroll - uses code that does not unroll the loops in the
1660                      block calculations (much slower)
1661 .    -mat_block_size - size of the blocks to use
1662 
1663    Level: intermediate
1664 
1665    Notes:
1666    Specify the preallocated storage with either nz or nnz (not both).
1667    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1668    allocation.  For additional details, see the users manual chapter on
1669    matrices.
1670 
1671 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1672 @*/
1673 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1674   PetscErrorCode ierr,(*f)(Mat,int,int,const int[]);
1675 
1676   PetscFunctionBegin;
1677   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1678   if (f) {
1679     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1680   }
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatCreateSeqSBAIJ"
1686 /*@C
1687    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1688    compressed row) format.  For good matrix assembly performance the
1689    user should preallocate the matrix storage by setting the parameter nz
1690    (or the array nnz).  By setting these parameters accurately, performance
1691    during matrix assembly can be increased by more than a factor of 50.
1692 
1693    Collective on MPI_Comm
1694 
1695    Input Parameters:
1696 +  comm - MPI communicator, set to PETSC_COMM_SELF
1697 .  bs - size of block
1698 .  m - number of rows, or number of columns
1699 .  nz - number of block nonzeros per block row (same for all rows)
1700 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1701          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1702 
1703    Output Parameter:
1704 .  A - the symmetric matrix
1705 
1706    Options Database Keys:
1707 .   -mat_no_unroll - uses code that does not unroll the loops in the
1708                      block calculations (much slower)
1709 .    -mat_block_size - size of the blocks to use
1710 
1711    Level: intermediate
1712 
1713    Notes:
1714 
1715    Specify the preallocated storage with either nz or nnz (not both).
1716    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1717    allocation.  For additional details, see the users manual chapter on
1718    matrices.
1719 
1720 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1721 @*/
1722 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1723 {
1724   PetscErrorCode ierr;
1725 
1726   PetscFunctionBegin;
1727   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1728   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1729   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1735 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1736 {
1737   Mat          C;
1738   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1739   PetscErrorCode ierr;
1740   int          i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1741 
1742   PetscFunctionBegin;
1743   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1744 
1745   *B = 0;
1746   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1747   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1748   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1749   c    = (Mat_SeqSBAIJ*)C->data;
1750 
1751   C->preallocated   = PETSC_TRUE;
1752   C->factor         = A->factor;
1753   c->row            = 0;
1754   c->icol           = 0;
1755   c->saved_values   = 0;
1756   c->keepzeroedrows = a->keepzeroedrows;
1757   C->assembled      = PETSC_TRUE;
1758 
1759   C->M    = A->M;
1760   C->N    = A->N;
1761   c->bs   = a->bs;
1762   c->bs2  = a->bs2;
1763   c->mbs  = a->mbs;
1764   c->nbs  = a->nbs;
1765 
1766   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1767   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1768   for (i=0; i<mbs; i++) {
1769     c->imax[i] = a->imax[i];
1770     c->ilen[i] = a->ilen[i];
1771   }
1772 
1773   /* allocate the matrix space */
1774   c->singlemalloc = PETSC_TRUE;
1775   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1776   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1777   c->j = (int*)(c->a + nz*bs2);
1778   c->i = c->j + nz;
1779   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1780   if (mbs > 0) {
1781     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1782     if (cpvalues == MAT_COPY_VALUES) {
1783       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1784     } else {
1785       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1786     }
1787   }
1788 
1789   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1790   c->sorted      = a->sorted;
1791   c->roworiented = a->roworiented;
1792   c->nonew       = a->nonew;
1793 
1794   if (a->diag) {
1795     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1796     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1797     for (i=0; i<mbs; i++) {
1798       c->diag[i] = a->diag[i];
1799     }
1800   } else c->diag        = 0;
1801   c->nz               = a->nz;
1802   c->maxnz            = a->maxnz;
1803   c->solve_work         = 0;
1804   c->mult_work          = 0;
1805   *B = C;
1806   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1812 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1813 {
1814   Mat_SeqSBAIJ *a;
1815   Mat          B;
1816   PetscErrorCode ierr;
1817   int          i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1818   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1819   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1820   int          *masked,nmask,tmp,bs2,ishift;
1821   PetscScalar  *aa;
1822   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1823 
1824   PetscFunctionBegin;
1825   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1826   bs2  = bs*bs;
1827 
1828   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1829   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1830   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1831   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1832   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1833   M = header[1]; N = header[2]; nz = header[3];
1834 
1835   if (header[3] < 0) {
1836     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1837   }
1838 
1839   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1840 
1841   /*
1842      This code adds extra rows to make sure the number of rows is
1843     divisible by the blocksize
1844   */
1845   mbs        = M/bs;
1846   extra_rows = bs - M + bs*(mbs);
1847   if (extra_rows == bs) extra_rows = 0;
1848   else                  mbs++;
1849   if (extra_rows) {
1850     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1851   }
1852 
1853   /* read in row lengths */
1854   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1855   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1856   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1857 
1858   /* read in column indices */
1859   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1860   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1861   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1862 
1863   /* loop over row lengths determining block row lengths */
1864   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1865   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1866   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1867   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1868   masked   = mask + mbs;
1869   rowcount = 0; nzcount = 0;
1870   for (i=0; i<mbs; i++) {
1871     nmask = 0;
1872     for (j=0; j<bs; j++) {
1873       kmax = rowlengths[rowcount];
1874       for (k=0; k<kmax; k++) {
1875         tmp = jj[nzcount++]/bs;   /* block col. index */
1876         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1877       }
1878       rowcount++;
1879     }
1880     s_browlengths[i] += nmask;
1881 
1882     /* zero out the mask elements we set */
1883     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1884   }
1885 
1886   /* create our matrix */
1887   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1888   ierr = MatSetType(B,type);CHKERRQ(ierr);
1889   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1890   a = (Mat_SeqSBAIJ*)B->data;
1891 
1892   /* set matrix "i" values */
1893   a->i[0] = 0;
1894   for (i=1; i<= mbs; i++) {
1895     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1896     a->ilen[i-1] = s_browlengths[i-1];
1897   }
1898   a->nz = a->i[mbs];
1899 
1900   /* read in nonzero values */
1901   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1902   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1903   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1904 
1905   /* set "a" and "j" values into matrix */
1906   nzcount = 0; jcount = 0;
1907   for (i=0; i<mbs; i++) {
1908     nzcountb = nzcount;
1909     nmask    = 0;
1910     for (j=0; j<bs; j++) {
1911       kmax = rowlengths[i*bs+j];
1912       for (k=0; k<kmax; k++) {
1913         tmp = jj[nzcount++]/bs; /* block col. index */
1914         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1915       }
1916     }
1917     /* sort the masked values */
1918     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1919 
1920     /* set "j" values into matrix */
1921     maskcount = 1;
1922     for (j=0; j<nmask; j++) {
1923       a->j[jcount++]  = masked[j];
1924       mask[masked[j]] = maskcount++;
1925     }
1926 
1927     /* set "a" values into matrix */
1928     ishift = bs2*a->i[i];
1929     for (j=0; j<bs; j++) {
1930       kmax = rowlengths[i*bs+j];
1931       for (k=0; k<kmax; k++) {
1932         tmp       = jj[nzcountb]/bs ; /* block col. index */
1933         if (tmp >= i){
1934           block     = mask[tmp] - 1;
1935           point     = jj[nzcountb] - bs*tmp;
1936           idx       = ishift + bs2*block + j + bs*point;
1937           a->a[idx] = aa[nzcountb];
1938         }
1939         nzcountb++;
1940       }
1941     }
1942     /* zero out the mask elements we set */
1943     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1944   }
1945   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1946 
1947   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1948   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1949   ierr = PetscFree(aa);CHKERRQ(ierr);
1950   ierr = PetscFree(jj);CHKERRQ(ierr);
1951   ierr = PetscFree(mask);CHKERRQ(ierr);
1952 
1953   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1954   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1955   ierr = MatView_Private(B);CHKERRQ(ierr);
1956   *A = B;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1962 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1963 {
1964   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1965   MatScalar    *aa=a->a,*v,*v1;
1966   PetscScalar  *x,*b,*t,sum,d;
1967   PetscErrorCode ierr;
1968   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j;
1969   int          nz,nz1,*vj,*vj1,i;
1970 
1971   PetscFunctionBegin;
1972   its = its*lits;
1973   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1974 
1975   if (bs > 1)
1976     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1977 
1978   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1979   if (xx != bb) {
1980     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1981   } else {
1982     b = x;
1983   }
1984 
1985   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1986 
1987   if (flag & SOR_ZERO_INITIAL_GUESS) {
1988     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1989       for (i=0; i<m; i++)
1990         t[i] = b[i];
1991 
1992       for (i=0; i<m; i++){
1993         d  = *(aa + ai[i]);  /* diag[i] */
1994         v  = aa + ai[i] + 1;
1995         vj = aj + ai[i] + 1;
1996         nz = ai[i+1] - ai[i] - 1;
1997         x[i] = omega*t[i]/d;
1998         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1999         PetscLogFlops(2*nz-1);
2000       }
2001     }
2002 
2003     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2004       for (i=0; i<m; i++)
2005         t[i] = b[i];
2006 
2007       for (i=0; i<m-1; i++){  /* update rhs */
2008         v  = aa + ai[i] + 1;
2009         vj = aj + ai[i] + 1;
2010         nz = ai[i+1] - ai[i] - 1;
2011         while (nz--) t[*vj++] -= x[i]*(*v++);
2012         PetscLogFlops(2*nz-1);
2013       }
2014       for (i=m-1; i>=0; i--){
2015         d  = *(aa + ai[i]);
2016         v  = aa + ai[i] + 1;
2017         vj = aj + ai[i] + 1;
2018         nz = ai[i+1] - ai[i] - 1;
2019         sum = t[i];
2020         while (nz--) sum -= x[*vj++]*(*v++);
2021         PetscLogFlops(2*nz-1);
2022         x[i] =   (1-omega)*x[i] + omega*sum/d;
2023       }
2024     }
2025     its--;
2026   }
2027 
2028   while (its--) {
2029     /*
2030        forward sweep:
2031        for i=0,...,m-1:
2032          sum[i] = (b[i] - U(i,:)x )/d[i];
2033          x[i]   = (1-omega)x[i] + omega*sum[i];
2034          b      = b - x[i]*U^T(i,:);
2035 
2036     */
2037     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2038       for (i=0; i<m; i++)
2039         t[i] = b[i];
2040 
2041       for (i=0; i<m; i++){
2042         d  = *(aa + ai[i]);  /* diag[i] */
2043         v  = aa + ai[i] + 1; v1=v;
2044         vj = aj + ai[i] + 1; vj1=vj;
2045         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2046         sum = t[i];
2047         while (nz1--) sum -= (*v1++)*x[*vj1++];
2048         x[i] = (1-omega)*x[i] + omega*sum/d;
2049         while (nz--) t[*vj++] -= x[i]*(*v++);
2050         PetscLogFlops(4*nz-2);
2051       }
2052     }
2053 
2054   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2055       /*
2056        backward sweep:
2057        b = b - x[i]*U^T(i,:), i=0,...,n-2
2058        for i=m-1,...,0:
2059          sum[i] = (b[i] - U(i,:)x )/d[i];
2060          x[i]   = (1-omega)x[i] + omega*sum[i];
2061       */
2062       for (i=0; i<m; i++)
2063         t[i] = b[i];
2064 
2065       for (i=0; i<m-1; i++){  /* update rhs */
2066         v  = aa + ai[i] + 1;
2067         vj = aj + ai[i] + 1;
2068         nz = ai[i+1] - ai[i] - 1;
2069         while (nz--) t[*vj++] -= x[i]*(*v++);
2070         PetscLogFlops(2*nz-1);
2071       }
2072       for (i=m-1; i>=0; i--){
2073         d  = *(aa + ai[i]);
2074         v  = aa + ai[i] + 1;
2075         vj = aj + ai[i] + 1;
2076         nz = ai[i+1] - ai[i] - 1;
2077         sum = t[i];
2078         while (nz--) sum -= x[*vj++]*(*v++);
2079         PetscLogFlops(2*nz-1);
2080         x[i] =   (1-omega)*x[i] + omega*sum/d;
2081       }
2082     }
2083   }
2084 
2085   ierr = PetscFree(t);CHKERRQ(ierr);
2086   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2087   if (bb != xx) {
2088     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2089   }
2090 
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 
2095 
2096 
2097 
2098 
2099