xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 4e5e7fe449b7c11ceb24afe5a5284bb8387e21ce)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int nz = a->i[n];
63     for (i=0; i<nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int nz = a->i[n]-1;
84     for (i=0; i<nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   case MAT_NOT_SYMMETRIC:
183   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
184   case MAT_HERMITIAN:
185     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
186   case MAT_SYMMETRIC:
187   case MAT_STRUCTURALLY_SYMMETRIC:
188   case MAT_NOT_HERMITIAN:
189   case MAT_SYMMETRY_ETERNAL:
190   case MAT_NOT_SYMMETRY_ETERNAL:
191     break;
192   default:
193     SETERRQ(PETSC_ERR_SUP,"unknown option");
194   }
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
200 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
201 {
202   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
203   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
204   MatScalar    *aa,*aa_i;
205   PetscScalar  *v_i;
206 
207   PetscFunctionBegin;
208   bs  = a->bs;
209   ai  = a->i;
210   aj  = a->j;
211   aa  = a->a;
212   bs2 = a->bs2;
213 
214   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
215 
216   bn  = row/bs;   /* Block number */
217   bp  = row % bs; /* Block position */
218   M   = ai[bn+1] - ai[bn];
219   *ncols = bs*M;
220 
221   if (v) {
222     *v = 0;
223     if (*ncols) {
224       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
225       for (i=0; i<M; i++) { /* for each block in the block row */
226         v_i  = *v + i*bs;
227         aa_i = aa + bs2*(ai[bn] + i);
228         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
229       }
230     }
231   }
232 
233   if (cols) {
234     *cols = 0;
235     if (*ncols) {
236       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
237       for (i=0; i<M; i++) { /* for each block in the block row */
238         cols_i = *cols + i*bs;
239         itmp  = bs*aj[ai[bn] + i];
240         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
241       }
242     }
243   }
244 
245   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
246   /* this segment is currently removed, so only entries in the upper triangle are obtained */
247 #ifdef column_search
248   v_i    = *v    + M*bs;
249   cols_i = *cols + M*bs;
250   for (i=0; i<bn; i++){ /* for each block row */
251     M = ai[i+1] - ai[i];
252     for (j=0; j<M; j++){
253       itmp = aj[ai[i] + j];    /* block column value */
254       if (itmp == bn){
255         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
256         for (k=0; k<bs; k++) {
257           *cols_i++ = i*bs+k;
258           *v_i++    = aa_i[k];
259         }
260         *ncols += bs;
261         break;
262       }
263     }
264   }
265 #endif
266 
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
272 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
273 {
274   int ierr;
275 
276   PetscFunctionBegin;
277   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
278   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
284 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
285 {
286   int ierr;
287   PetscFunctionBegin;
288   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
294 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
295 {
296   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
297   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
298   char              *name;
299   PetscViewerFormat format;
300 
301   PetscFunctionBegin;
302   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
303   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
304   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
305     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
306   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
307     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
308   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
309     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
310     for (i=0; i<a->mbs; i++) {
311       for (j=0; j<bs; j++) {
312         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
313         for (k=a->i[i]; k<a->i[i+1]; k++) {
314           for (l=0; l<bs; l++) {
315 #if defined(PETSC_USE_COMPLEX)
316             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
317               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
318                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
319             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
320               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
321                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
322             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
323               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
324             }
325 #else
326             if (a->a[bs2*k + l*bs + j] != 0.0) {
327               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
328             }
329 #endif
330           }
331         }
332         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
333       }
334     }
335     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
336   } else {
337     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
338     for (i=0; i<a->mbs; i++) {
339       for (j=0; j<bs; j++) {
340         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
341         for (k=a->i[i]; k<a->i[i+1]; k++) {
342           for (l=0; l<bs; l++) {
343 #if defined(PETSC_USE_COMPLEX)
344             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
345               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
346                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
347             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
348               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
349                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
350             } else {
351               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
352             }
353 #else
354             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
355 #endif
356           }
357         }
358         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
359       }
360     }
361     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
362   }
363   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
364   PetscFunctionReturn(0);
365 }
366 
367 #undef __FUNCT__
368 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
369 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
370 {
371   Mat          A = (Mat) Aa;
372   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
373   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
374   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
375   MatScalar    *aa;
376   MPI_Comm     comm;
377   PetscViewer  viewer;
378 
379   PetscFunctionBegin;
380   /*
381       This is nasty. If this is called from an originally parallel matrix
382    then all processes call this,but only the first has the matrix so the
383    rest should return immediately.
384   */
385   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
386   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
387   if (rank) PetscFunctionReturn(0);
388 
389   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
390 
391   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
392   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
393 
394   /* loop over matrix elements drawing boxes */
395   color = PETSC_DRAW_BLUE;
396   for (i=0,row=0; i<mbs; i++,row+=bs) {
397     for (j=a->i[i]; j<a->i[i+1]; j++) {
398       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
399       x_l = a->j[j]*bs; x_r = x_l + 1.0;
400       aa = a->a + j*bs2;
401       for (k=0; k<bs; k++) {
402         for (l=0; l<bs; l++) {
403           if (PetscRealPart(*aa++) >=  0.) continue;
404           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
405         }
406       }
407     }
408   }
409   color = PETSC_DRAW_CYAN;
410   for (i=0,row=0; i<mbs; i++,row+=bs) {
411     for (j=a->i[i]; j<a->i[i+1]; j++) {
412       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
413       x_l = a->j[j]*bs; x_r = x_l + 1.0;
414       aa = a->a + j*bs2;
415       for (k=0; k<bs; k++) {
416         for (l=0; l<bs; l++) {
417           if (PetscRealPart(*aa++) != 0.) continue;
418           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
419         }
420       }
421     }
422   }
423 
424   color = PETSC_DRAW_RED;
425   for (i=0,row=0; i<mbs; i++,row+=bs) {
426     for (j=a->i[i]; j<a->i[i+1]; j++) {
427       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
428       x_l = a->j[j]*bs; x_r = x_l + 1.0;
429       aa = a->a + j*bs2;
430       for (k=0; k<bs; k++) {
431         for (l=0; l<bs; l++) {
432           if (PetscRealPart(*aa++) <= 0.) continue;
433           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
434         }
435       }
436     }
437   }
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
443 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
444 {
445   int          ierr;
446   PetscReal    xl,yl,xr,yr,w,h;
447   PetscDraw    draw;
448   PetscTruth   isnull;
449 
450   PetscFunctionBegin;
451 
452   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
453   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
454 
455   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
456   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
457   xr += w;    yr += h;  xl = -w;     yl = -h;
458   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
459   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
460   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatView_SeqSBAIJ"
466 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
467 {
468   int        ierr;
469   PetscTruth isascii,isdraw;
470 
471   PetscFunctionBegin;
472   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
473   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
474   if (isascii){
475     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
476   } else if (isdraw) {
477     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
478   } else {
479     Mat B;
480     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
481     ierr = MatView(B,viewer);CHKERRQ(ierr);
482     ierr = MatDestroy(B);CHKERRQ(ierr);
483   }
484   PetscFunctionReturn(0);
485 }
486 
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
490 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
491 {
492   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
493   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
494   int        *ai = a->i,*ailen = a->ilen;
495   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
496   MatScalar  *ap,*aa = a->a,zero = 0.0;
497 
498   PetscFunctionBegin;
499   for (k=0; k<m; k++) { /* loop over rows */
500     row  = im[k]; brow = row/bs;
501     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
502     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
503     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
504     nrow = ailen[brow];
505     for (l=0; l<n; l++) { /* loop over columns */
506       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
507       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
508       col  = in[l] ;
509       bcol = col/bs;
510       cidx = col%bs;
511       ridx = row%bs;
512       high = nrow;
513       low  = 0; /* assume unsorted */
514       while (high-low > 5) {
515         t = (low+high)/2;
516         if (rp[t] > bcol) high = t;
517         else             low  = t;
518       }
519       for (i=low; i<high; i++) {
520         if (rp[i] > bcol) break;
521         if (rp[i] == bcol) {
522           *v++ = ap[bs2*i+bs*cidx+ridx];
523           goto finished;
524         }
525       }
526       *v++ = zero;
527       finished:;
528     }
529   }
530   PetscFunctionReturn(0);
531 }
532 
533 
534 #undef __FUNCT__
535 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
536 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
537 {
538   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
539   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
540   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
541   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
542   PetscTruth      roworiented=a->roworiented;
543   const MatScalar *value = v;
544   MatScalar       *ap,*aa = a->a,*bap;
545 
546   PetscFunctionBegin;
547   if (roworiented) {
548     stepval = (n-1)*bs;
549   } else {
550     stepval = (m-1)*bs;
551   }
552   for (k=0; k<m; k++) { /* loop over added rows */
553     row  = im[k];
554     if (row < 0) continue;
555 #if defined(PETSC_USE_BOPT_g)
556     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
557 #endif
558     rp   = aj + ai[row];
559     ap   = aa + bs2*ai[row];
560     rmax = imax[row];
561     nrow = ailen[row];
562     low  = 0;
563     for (l=0; l<n; l++) { /* loop over added columns */
564       if (in[l] < 0) continue;
565       col = in[l];
566 #if defined(PETSC_USE_BOPT_g)
567       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
568 #endif
569       if (col < row) continue; /* ignore lower triangular block */
570       if (roworiented) {
571         value = v + k*(stepval+bs)*bs + l*bs;
572       } else {
573         value = v + l*(stepval+bs)*bs + k*bs;
574       }
575       if (!sorted) low = 0; high = nrow;
576       while (high-low > 7) {
577         t = (low+high)/2;
578         if (rp[t] > col) high = t;
579         else             low  = t;
580       }
581       for (i=low; i<high; i++) {
582         if (rp[i] > col) break;
583         if (rp[i] == col) {
584           bap  = ap +  bs2*i;
585           if (roworiented) {
586             if (is == ADD_VALUES) {
587               for (ii=0; ii<bs; ii++,value+=stepval) {
588                 for (jj=ii; jj<bs2; jj+=bs) {
589                   bap[jj] += *value++;
590                 }
591               }
592             } else {
593               for (ii=0; ii<bs; ii++,value+=stepval) {
594                 for (jj=ii; jj<bs2; jj+=bs) {
595                   bap[jj] = *value++;
596                 }
597               }
598             }
599           } else {
600             if (is == ADD_VALUES) {
601               for (ii=0; ii<bs; ii++,value+=stepval) {
602                 for (jj=0; jj<bs; jj++) {
603                   *bap++ += *value++;
604                 }
605               }
606             } else {
607               for (ii=0; ii<bs; ii++,value+=stepval) {
608                 for (jj=0; jj<bs; jj++) {
609                   *bap++  = *value++;
610                 }
611               }
612             }
613           }
614           goto noinsert2;
615         }
616       }
617       if (nonew == 1) goto noinsert2;
618       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
619       if (nrow >= rmax) {
620         /* there is no extra room in row, therefore enlarge */
621         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
622         MatScalar *new_a;
623 
624         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
625 
626         /* malloc new storage space */
627         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
628 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
629         new_j   = (int*)(new_a + bs2*new_nz);
630         new_i   = new_j + new_nz;
631 
632         /* copy over old data into new slots */
633         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
634         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
635         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
636         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
637         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
638         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
639         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
640         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
641         /* free up old matrix storage */
642         ierr = PetscFree(a->a);CHKERRQ(ierr);
643         if (!a->singlemalloc) {
644           ierr = PetscFree(a->i);CHKERRQ(ierr);
645           ierr = PetscFree(a->j);CHKERRQ(ierr);
646         }
647         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
648         a->singlemalloc = PETSC_TRUE;
649 
650         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
651         rmax = imax[row] = imax[row] + CHUNKSIZE;
652         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
653         a->maxnz += bs2*CHUNKSIZE;
654         a->reallocs++;
655         a->nz++;
656       }
657       N = nrow++ - 1;
658       /* shift up all the later entries in this row */
659       for (ii=N; ii>=i; ii--) {
660         rp[ii+1] = rp[ii];
661         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
662       }
663       if (N >= i) {
664         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
665       }
666       rp[i] = col;
667       bap   = ap +  bs2*i;
668       if (roworiented) {
669         for (ii=0; ii<bs; ii++,value+=stepval) {
670           for (jj=ii; jj<bs2; jj+=bs) {
671             bap[jj] = *value++;
672           }
673         }
674       } else {
675         for (ii=0; ii<bs; ii++,value+=stepval) {
676           for (jj=0; jj<bs; jj++) {
677             *bap++  = *value++;
678           }
679         }
680       }
681       noinsert2:;
682       low = i;
683     }
684     ailen[row] = nrow;
685   }
686   PetscFunctionReturn(0);
687 }
688 
689 #undef __FUNCT__
690 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
691 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
692 {
693   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
694   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
695   int        m = A->m,*ip,N,*ailen = a->ilen;
696   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
697   MatScalar  *aa = a->a,*ap;
698 
699   PetscFunctionBegin;
700   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
701 
702   if (m) rmax = ailen[0];
703   for (i=1; i<mbs; i++) {
704     /* move each row back by the amount of empty slots (fshift) before it*/
705     fshift += imax[i-1] - ailen[i-1];
706     rmax   = PetscMax(rmax,ailen[i]);
707     if (fshift) {
708       ip = aj + ai[i]; ap = aa + bs2*ai[i];
709       N = ailen[i];
710       for (j=0; j<N; j++) {
711         ip[j-fshift] = ip[j];
712         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
713       }
714     }
715     ai[i] = ai[i-1] + ailen[i-1];
716   }
717   if (mbs) {
718     fshift += imax[mbs-1] - ailen[mbs-1];
719     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
720   }
721   /* reset ilen and imax for each row */
722   for (i=0; i<mbs; i++) {
723     ailen[i] = imax[i] = ai[i+1] - ai[i];
724   }
725   a->nz = ai[mbs];
726 
727   /* diagonals may have moved, reset it */
728   if (a->diag) {
729     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
730   }
731   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
732            m,A->m,a->bs,fshift*bs2,a->nz*bs2);
733   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
734            a->reallocs);
735   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
736   a->reallocs          = 0;
737   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
738 
739   PetscFunctionReturn(0);
740 }
741 
742 /*
743    This function returns an array of flags which indicate the locations of contiguous
744    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
745    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
746    Assume: sizes should be long enough to hold all the values.
747 */
748 #undef __FUNCT__
749 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
750 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
751 {
752   int        i,j,k,row;
753   PetscTruth flg;
754 
755   PetscFunctionBegin;
756   for (i=0,j=0; i<n; j++) {
757     row = idx[i];
758     if (row%bs!=0) { /* Not the begining of a block */
759       sizes[j] = 1;
760       i++;
761     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
762       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
763       i++;
764     } else { /* Begining of the block, so check if the complete block exists */
765       flg = PETSC_TRUE;
766       for (k=1; k<bs; k++) {
767         if (row+k != idx[i+k]) { /* break in the block */
768           flg = PETSC_FALSE;
769           break;
770         }
771       }
772       if (flg == PETSC_TRUE) { /* No break in the bs */
773         sizes[j] = bs;
774         i+= bs;
775       } else {
776         sizes[j] = 1;
777         i++;
778       }
779     }
780   }
781   *bs_max = j;
782   PetscFunctionReturn(0);
783 }
784 
785 #undef __FUNCT__
786 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
787 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
788 {
789   PetscFunctionBegin;
790   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
791 }
792 
793 /* Only add/insert a(i,j) with i<=j (blocks).
794    Any a(i,j) with i>j input by user is ingored.
795 */
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
799 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
800 {
801   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
802   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
803   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
804   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
805   int         ridx,cidx,bs2=a->bs2,ierr;
806   MatScalar   *ap,value,*aa=a->a,*bap;
807 
808   PetscFunctionBegin;
809 
810   for (k=0; k<m; k++) { /* loop over added rows */
811     row  = im[k];       /* row number */
812     brow = row/bs;      /* block row number */
813     if (row < 0) continue;
814 #if defined(PETSC_USE_BOPT_g)
815     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
816 #endif
817     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
818     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
819     rmax = imax[brow];  /* maximum space allocated for this row */
820     nrow = ailen[brow]; /* actual length of this row */
821     low  = 0;
822 
823     for (l=0; l<n; l++) { /* loop over added columns */
824       if (in[l] < 0) continue;
825 #if defined(PETSC_USE_BOPT_g)
826       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
827 #endif
828       col = in[l];
829       bcol = col/bs;              /* block col number */
830 
831       if (brow <= bcol){
832         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
833         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
834           /* element value a(k,l) */
835           if (roworiented) {
836             value = v[l + k*n];
837           } else {
838             value = v[k + l*m];
839           }
840 
841           /* move pointer bap to a(k,l) quickly and add/insert value */
842           if (!sorted) low = 0; high = nrow;
843           while (high-low > 7) {
844             t = (low+high)/2;
845             if (rp[t] > bcol) high = t;
846             else              low  = t;
847           }
848           for (i=low; i<high; i++) {
849             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
850             if (rp[i] > bcol) break;
851             if (rp[i] == bcol) {
852               bap  = ap +  bs2*i + bs*cidx + ridx;
853               if (is == ADD_VALUES) *bap += value;
854               else                  *bap  = value;
855               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
856               if (brow == bcol && ridx < cidx){
857                 bap  = ap +  bs2*i + bs*ridx + cidx;
858                 if (is == ADD_VALUES) *bap += value;
859                 else                  *bap  = value;
860               }
861               goto noinsert1;
862             }
863           }
864 
865       if (nonew == 1) goto noinsert1;
866       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
867       if (nrow >= rmax) {
868         /* there is no extra room in row, therefore enlarge */
869         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
870         MatScalar *new_a;
871 
872         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
873 
874         /* Malloc new storage space */
875         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
876         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
877         new_j = (int*)(new_a + bs2*new_nz);
878         new_i = new_j + new_nz;
879 
880         /* copy over old data into new slots */
881         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
882         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
883         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
884         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
885         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
886         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
887         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
888         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
889         /* free up old matrix storage */
890         ierr = PetscFree(a->a);CHKERRQ(ierr);
891         if (!a->singlemalloc) {
892           ierr = PetscFree(a->i);CHKERRQ(ierr);
893           ierr = PetscFree(a->j);CHKERRQ(ierr);
894         }
895         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
896         a->singlemalloc = PETSC_TRUE;
897 
898         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
899         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
900         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
901         a->maxnz += bs2*CHUNKSIZE;
902         a->reallocs++;
903         a->nz++;
904       }
905 
906       N = nrow++ - 1;
907       /* shift up all the later entries in this row */
908       for (ii=N; ii>=i; ii--) {
909         rp[ii+1] = rp[ii];
910         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
911       }
912       if (N>=i) {
913         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
914       }
915       rp[i]                      = bcol;
916       ap[bs2*i + bs*cidx + ridx] = value;
917       noinsert1:;
918       low = i;
919       /* } */
920         }
921       } /* end of if .. if..  */
922     }                     /* end of loop over added columns */
923     ailen[brow] = nrow;
924   }                       /* end of loop over added rows */
925 
926   PetscFunctionReturn(0);
927 }
928 
929 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
930 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
931 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
932 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
933 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
934 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
935 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
936 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
937 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
938 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
939 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
940 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
941 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
942 extern int MatZeroEntries_SeqSBAIJ(Mat);
943 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
944 
945 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
946 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
947 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
948 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
949 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
950 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
951 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
952 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
953 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
954 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
955 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
956 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
957 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
958 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
959 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
960 
961 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
962 
963 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
964 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
965 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
966 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
967 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
968 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
969 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
970 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
971 
972 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
973 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
974 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
975 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
976 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
977 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
978 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
979 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
980 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
981 
982 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
983 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
984 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
985 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
986 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
987 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
988 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
989 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
990 
991 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
992 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
993 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
994 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
995 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
996 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
997 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
998 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
999 
1000 #ifdef HAVE_MatICCFactor
1001 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1004 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1005 {
1006   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1007   Mat         outA;
1008   int         ierr;
1009   PetscTruth  row_identity,col_identity;
1010 
1011   PetscFunctionBegin;
1012   /*
1013   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1014   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1015   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1016   if (!row_identity || !col_identity) {
1017     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1018   }
1019   */
1020 
1021   outA          = inA;
1022   inA->factor   = FACTOR_CHOLESKY;
1023 
1024   if (!a->diag) {
1025     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1026   }
1027   /*
1028       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1029       for ILU(0) factorization with natural ordering
1030   */
1031   switch (a->bs) {
1032   case 1:
1033     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1034     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1035     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1036     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1037   case 2:
1038     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1039     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1040     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1041     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1042     break;
1043   case 3:
1044     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1045     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1046     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1047     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1048     break;
1049   case 4:
1050     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1051     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1052     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1053     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1054     break;
1055   case 5:
1056     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1057     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1058     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1059     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1060     break;
1061   case 6:
1062     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1063     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1064     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1065     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1066     break;
1067   case 7:
1068     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1069     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1070     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1071     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1072     break;
1073   default:
1074     a->row        = row;
1075     a->icol       = col;
1076     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1077     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1078 
1079     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1080     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1081     PetscLogObjectParent(inA,a->icol);
1082 
1083     if (!a->solve_work) {
1084       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1085       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1086     }
1087   }
1088 
1089   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1090 
1091   PetscFunctionReturn(0);
1092 }
1093 #endif
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1097 int MatPrintHelp_SeqSBAIJ(Mat A)
1098 {
1099   static PetscTruth called = PETSC_FALSE;
1100   MPI_Comm          comm = A->comm;
1101   int               ierr;
1102 
1103   PetscFunctionBegin;
1104   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1105   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1106   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 EXTERN_C_BEGIN
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1113 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1114 {
1115   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1116   int         i,nz,n;
1117 
1118   PetscFunctionBegin;
1119   nz = baij->maxnz;
1120   n  = mat->n;
1121   for (i=0; i<nz; i++) {
1122     baij->j[i] = indices[i];
1123   }
1124   baij->nz = nz;
1125   for (i=0; i<n; i++) {
1126     baij->ilen[i] = baij->imax[i];
1127   }
1128 
1129   PetscFunctionReturn(0);
1130 }
1131 EXTERN_C_END
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1135 /*@
1136     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1137        in the matrix.
1138 
1139   Input Parameters:
1140 +  mat     - the SeqSBAIJ matrix
1141 -  indices - the column indices
1142 
1143   Level: advanced
1144 
1145   Notes:
1146     This can be called if you have precomputed the nonzero structure of the
1147   matrix and want to provide it to the matrix object to improve the performance
1148   of the MatSetValues() operation.
1149 
1150     You MUST have set the correct numbers of nonzeros per row in the call to
1151   MatCreateSeqSBAIJ().
1152 
1153     MUST be called before any calls to MatSetValues()
1154 
1155 .seealso: MatCreateSeqSBAIJ
1156 @*/
1157 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1158 {
1159   int ierr,(*f)(Mat,int *);
1160 
1161   PetscFunctionBegin;
1162   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1163   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1164   if (f) {
1165     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1166   } else {
1167     SETERRQ(1,"Wrong type of matrix to set column indices");
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1174 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1175 {
1176   int        ierr;
1177 
1178   PetscFunctionBegin;
1179   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1185 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1186 {
1187   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1188   PetscFunctionBegin;
1189   *array = a->a;
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1195 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1196 {
1197   PetscFunctionBegin;
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #include "petscblaslapack.h"
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1204 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1205 {
1206   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1207   int          ierr,one=1,i,bs=y->bs,bs2,j;
1208 
1209   PetscFunctionBegin;
1210   if (str == SAME_NONZERO_PATTERN) {
1211     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1212   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1213     if (y->xtoy && y->XtoY != X) {
1214       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1215       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1216     }
1217     if (!y->xtoy) { /* get xtoy */
1218       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1219       y->XtoY = X;
1220     }
1221     bs2 = bs*bs;
1222     for (i=0; i<x->nz; i++) {
1223       j = 0;
1224       while (j < bs2){
1225         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1226         j++;
1227       }
1228     }
1229     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1230   } else {
1231     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1232   }
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1238 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1239 {
1240   PetscFunctionBegin;
1241   *flg = PETSC_TRUE;
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1247 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1248 {
1249   PetscFunctionBegin;
1250   *flg = PETSC_TRUE;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1256 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1257 {
1258   PetscFunctionBegin;
1259   *flg = PETSC_FALSE;
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 /* -------------------------------------------------------------------*/
1264 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1265        MatGetRow_SeqSBAIJ,
1266        MatRestoreRow_SeqSBAIJ,
1267        MatMult_SeqSBAIJ_N,
1268 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1269        MatMultTranspose_SeqSBAIJ,
1270        MatMultTransposeAdd_SeqSBAIJ,
1271        MatSolve_SeqSBAIJ_N,
1272        0,
1273        0,
1274 /*10*/ 0,
1275        0,
1276        MatCholeskyFactor_SeqSBAIJ,
1277        MatRelax_SeqSBAIJ,
1278        MatTranspose_SeqSBAIJ,
1279 /*15*/ MatGetInfo_SeqSBAIJ,
1280        MatEqual_SeqSBAIJ,
1281        MatGetDiagonal_SeqSBAIJ,
1282        MatDiagonalScale_SeqSBAIJ,
1283        MatNorm_SeqSBAIJ,
1284 /*20*/ 0,
1285        MatAssemblyEnd_SeqSBAIJ,
1286        0,
1287        MatSetOption_SeqSBAIJ,
1288        MatZeroEntries_SeqSBAIJ,
1289 /*25*/ MatZeroRows_SeqSBAIJ,
1290        0,
1291        0,
1292        MatCholeskyFactorSymbolic_SeqSBAIJ,
1293        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1294 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1295        0,
1296        MatICCFactorSymbolic_SeqSBAIJ,
1297        MatGetArray_SeqSBAIJ,
1298        MatRestoreArray_SeqSBAIJ,
1299 /*35*/ MatDuplicate_SeqSBAIJ,
1300        0,
1301        0,
1302        0,
1303        0,
1304 /*40*/ MatAXPY_SeqSBAIJ,
1305        MatGetSubMatrices_SeqSBAIJ,
1306        MatIncreaseOverlap_SeqSBAIJ,
1307        MatGetValues_SeqSBAIJ,
1308        0,
1309 /*45*/ MatPrintHelp_SeqSBAIJ,
1310        MatScale_SeqSBAIJ,
1311        0,
1312        0,
1313        0,
1314 /*50*/ MatGetBlockSize_SeqSBAIJ,
1315        MatGetRowIJ_SeqSBAIJ,
1316        MatRestoreRowIJ_SeqSBAIJ,
1317        0,
1318        0,
1319 /*55*/ 0,
1320        0,
1321        0,
1322        0,
1323        MatSetValuesBlocked_SeqSBAIJ,
1324 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1325        0,
1326        0,
1327        MatGetPetscMaps_Petsc,
1328        0,
1329 /*65*/ 0,
1330        0,
1331        0,
1332        0,
1333        0,
1334 /*70*/ MatGetRowMax_SeqSBAIJ,
1335        0,
1336        0,
1337        0,
1338        0,
1339 /*75*/ 0,
1340        0,
1341        0,
1342        0,
1343        0,
1344 /*80*/ 0,
1345        0,
1346        0,
1347 #if !defined(PETSC_USE_COMPLEX)
1348        MatGetInertia_SeqSBAIJ,
1349 #else
1350        0,
1351 #endif
1352 /*85*/ MatLoad_SeqSBAIJ,
1353        MatIsSymmetric_SeqSBAIJ,
1354        MatIsStructurallySymmetric_SeqSBAIJ,
1355        MatIsHermitian_SeqSBAIJ
1356 };
1357 
1358 EXTERN_C_BEGIN
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1361 int MatStoreValues_SeqSBAIJ(Mat mat)
1362 {
1363   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1364   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1365   int         ierr;
1366 
1367   PetscFunctionBegin;
1368   if (aij->nonew != 1) {
1369     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1370   }
1371 
1372   /* allocate space for values if not already there */
1373   if (!aij->saved_values) {
1374     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1375   }
1376 
1377   /* copy values over */
1378   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 EXTERN_C_END
1382 
1383 EXTERN_C_BEGIN
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1386 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1387 {
1388   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1389   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1390 
1391   PetscFunctionBegin;
1392   if (aij->nonew != 1) {
1393     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1394   }
1395   if (!aij->saved_values) {
1396     SETERRQ(1,"Must call MatStoreValues(A);first");
1397   }
1398 
1399   /* copy values over */
1400   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1401   PetscFunctionReturn(0);
1402 }
1403 EXTERN_C_END
1404 
1405 EXTERN_C_BEGIN
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1408 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1409 {
1410   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1411   int          i,len,ierr,mbs,bs2;
1412   PetscTruth   flg;
1413 
1414   PetscFunctionBegin;
1415   B->preallocated = PETSC_TRUE;
1416   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1417   mbs  = B->m/bs;
1418   bs2  = bs*bs;
1419 
1420   if (mbs*bs != B->m) {
1421     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1422   }
1423 
1424   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1425   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1426   if (nnz) {
1427     for (i=0; i<mbs; i++) {
1428       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1429       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1430     }
1431   }
1432 
1433   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1434   if (!flg) {
1435     switch (bs) {
1436     case 1:
1437       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1438       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1439       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1440       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1441       B->ops->mult            = MatMult_SeqSBAIJ_1;
1442       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1443       break;
1444     case 2:
1445       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1446       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1447       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1448       B->ops->mult            = MatMult_SeqSBAIJ_2;
1449       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1450       break;
1451     case 3:
1452       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1453       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1454       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1455       B->ops->mult            = MatMult_SeqSBAIJ_3;
1456       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1457       break;
1458     case 4:
1459       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1460       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1461       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1462       B->ops->mult            = MatMult_SeqSBAIJ_4;
1463       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1464       break;
1465     case 5:
1466       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1467       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1468       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1469       B->ops->mult            = MatMult_SeqSBAIJ_5;
1470       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1471       break;
1472     case 6:
1473       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1474       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1475       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1476       B->ops->mult            = MatMult_SeqSBAIJ_6;
1477       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1478       break;
1479     case 7:
1480       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1481       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1482       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1483       B->ops->mult            = MatMult_SeqSBAIJ_7;
1484       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1485       break;
1486     }
1487   }
1488 
1489   b->mbs = mbs;
1490   b->nbs = mbs;
1491   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1492   if (!nnz) {
1493     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1494     else if (nz <= 0)        nz = 1;
1495     for (i=0; i<mbs; i++) {
1496       b->imax[i] = nz;
1497     }
1498     nz = nz*mbs; /* total nz */
1499   } else {
1500     nz = 0;
1501     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1502   }
1503   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1504 
1505   /* allocate the matrix space */
1506   len  = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1507   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1508   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1509   b->j = (int*)(b->a + nz*bs2);
1510   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1511   b->i = b->j + nz;
1512   b->singlemalloc = PETSC_TRUE;
1513 
1514   /* pointer to beginning of each row */
1515   b->i[0] = 0;
1516   for (i=1; i<mbs+1; i++) {
1517     b->i[i] = b->i[i-1] + b->imax[i-1];
1518   }
1519 
1520   /* b->ilen will count nonzeros in each block row so far. */
1521   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1522   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1523   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1524 
1525   b->bs               = bs;
1526   b->bs2              = bs2;
1527   b->nz             = 0;
1528   b->maxnz          = nz*bs2;
1529 
1530   b->inew             = 0;
1531   b->jnew             = 0;
1532   b->anew             = 0;
1533   b->a2anew           = 0;
1534   b->permute          = PETSC_FALSE;
1535   PetscFunctionReturn(0);
1536 }
1537 EXTERN_C_END
1538 
1539 /*MC
1540    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1541    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1542 
1543    Options Database Keys:
1544 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1545 
1546   Level: beginner
1547 
1548 .seealso: MatCreateSeqSBAIJ
1549 M*/
1550 
1551 EXTERN_C_BEGIN
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1554 int MatCreate_SeqSBAIJ(Mat B)
1555 {
1556   Mat_SeqSBAIJ *b;
1557   int          ierr,size;
1558 
1559   PetscFunctionBegin;
1560   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1561   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1562   B->m = B->M = PetscMax(B->m,B->M);
1563   B->n = B->N = PetscMax(B->n,B->N);
1564 
1565   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1566   B->data = (void*)b;
1567   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1568   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1569   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1570   B->ops->view        = MatView_SeqSBAIJ;
1571   B->factor           = 0;
1572   B->lupivotthreshold = 1.0;
1573   B->mapping          = 0;
1574   b->row              = 0;
1575   b->icol             = 0;
1576   b->reallocs         = 0;
1577   b->saved_values     = 0;
1578 
1579   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1580   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1581 
1582   b->sorted           = PETSC_FALSE;
1583   b->roworiented      = PETSC_TRUE;
1584   b->nonew            = 0;
1585   b->diag             = 0;
1586   b->solve_work       = 0;
1587   b->mult_work        = 0;
1588   B->spptr            = 0;
1589   b->keepzeroedrows   = PETSC_FALSE;
1590   b->xtoy             = 0;
1591   b->XtoY             = 0;
1592 
1593   b->inew             = 0;
1594   b->jnew             = 0;
1595   b->anew             = 0;
1596   b->a2anew           = 0;
1597   b->permute          = PETSC_FALSE;
1598 
1599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1600                                      "MatStoreValues_SeqSBAIJ",
1601                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1603                                      "MatRetrieveValues_SeqSBAIJ",
1604                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1606                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1607                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1609                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1610                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1612                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1613                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 EXTERN_C_END
1617 
1618 #undef __FUNCT__
1619 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1620 /*@C
1621    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1622    compressed row) format.  For good matrix assembly performance the
1623    user should preallocate the matrix storage by setting the parameter nz
1624    (or the array nnz).  By setting these parameters accurately, performance
1625    during matrix assembly can be increased by more than a factor of 50.
1626 
1627    Collective on Mat
1628 
1629    Input Parameters:
1630 +  A - the symmetric matrix
1631 .  bs - size of block
1632 .  nz - number of block nonzeros per block row (same for all rows)
1633 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1634          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1635 
1636    Options Database Keys:
1637 .   -mat_no_unroll - uses code that does not unroll the loops in the
1638                      block calculations (much slower)
1639 .    -mat_block_size - size of the blocks to use
1640 
1641    Level: intermediate
1642 
1643    Notes:
1644    Specify the preallocated storage with either nz or nnz (not both).
1645    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1646    allocation.  For additional details, see the users manual chapter on
1647    matrices.
1648 
1649 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1650 @*/
1651 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1652   int ierr,(*f)(Mat,int,int,const int[]);
1653 
1654   PetscFunctionBegin;
1655   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1656   if (f) {
1657     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1658   }
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 #undef __FUNCT__
1663 #define __FUNCT__ "MatCreateSeqSBAIJ"
1664 /*@C
1665    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1666    compressed row) format.  For good matrix assembly performance the
1667    user should preallocate the matrix storage by setting the parameter nz
1668    (or the array nnz).  By setting these parameters accurately, performance
1669    during matrix assembly can be increased by more than a factor of 50.
1670 
1671    Collective on MPI_Comm
1672 
1673    Input Parameters:
1674 +  comm - MPI communicator, set to PETSC_COMM_SELF
1675 .  bs - size of block
1676 .  m - number of rows, or number of columns
1677 .  nz - number of block nonzeros per block row (same for all rows)
1678 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1679          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1680 
1681    Output Parameter:
1682 .  A - the symmetric matrix
1683 
1684    Options Database Keys:
1685 .   -mat_no_unroll - uses code that does not unroll the loops in the
1686                      block calculations (much slower)
1687 .    -mat_block_size - size of the blocks to use
1688 
1689    Level: intermediate
1690 
1691    Notes:
1692 
1693    Specify the preallocated storage with either nz or nnz (not both).
1694    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1695    allocation.  For additional details, see the users manual chapter on
1696    matrices.
1697 
1698 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1699 @*/
1700 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1701 {
1702   int ierr;
1703 
1704   PetscFunctionBegin;
1705   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1706   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1707   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 #undef __FUNCT__
1712 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1713 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1714 {
1715   Mat         C;
1716   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1717   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1718 
1719   PetscFunctionBegin;
1720   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1721 
1722   *B = 0;
1723   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1724   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1725   c    = (Mat_SeqSBAIJ*)C->data;
1726 
1727   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1728   C->preallocated   = PETSC_TRUE;
1729   C->factor         = A->factor;
1730   c->row            = 0;
1731   c->icol           = 0;
1732   c->saved_values   = 0;
1733   c->keepzeroedrows = a->keepzeroedrows;
1734   C->assembled      = PETSC_TRUE;
1735 
1736   c->bs         = a->bs;
1737   c->bs2        = a->bs2;
1738   c->mbs        = a->mbs;
1739   c->nbs        = a->nbs;
1740 
1741   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1742   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1743   for (i=0; i<mbs; i++) {
1744     c->imax[i] = a->imax[i];
1745     c->ilen[i] = a->ilen[i];
1746   }
1747 
1748   /* allocate the matrix space */
1749   c->singlemalloc = PETSC_TRUE;
1750   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1751   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1752   c->j = (int*)(c->a + nz*bs2);
1753   c->i = c->j + nz;
1754   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1755   if (mbs > 0) {
1756     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1757     if (cpvalues == MAT_COPY_VALUES) {
1758       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1759     } else {
1760       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1761     }
1762   }
1763 
1764   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1765   c->sorted      = a->sorted;
1766   c->roworiented = a->roworiented;
1767   c->nonew       = a->nonew;
1768 
1769   if (a->diag) {
1770     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1771     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1772     for (i=0; i<mbs; i++) {
1773       c->diag[i] = a->diag[i];
1774     }
1775   } else c->diag        = 0;
1776   c->nz               = a->nz;
1777   c->maxnz            = a->maxnz;
1778   c->solve_work         = 0;
1779   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1780   c->mult_work          = 0;
1781   *B = C;
1782   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 #undef __FUNCT__
1787 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1788 int MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1789 {
1790   Mat_SeqSBAIJ *a;
1791   Mat          B;
1792   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1793   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1794   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1795   int          *masked,nmask,tmp,bs2,ishift;
1796   PetscScalar  *aa;
1797   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1798 
1799   PetscFunctionBegin;
1800   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1801   bs2  = bs*bs;
1802 
1803   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1804   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1805   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1806   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1807   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1808   M = header[1]; N = header[2]; nz = header[3];
1809 
1810   if (header[3] < 0) {
1811     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1812   }
1813 
1814   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1815 
1816   /*
1817      This code adds extra rows to make sure the number of rows is
1818     divisible by the blocksize
1819   */
1820   mbs        = M/bs;
1821   extra_rows = bs - M + bs*(mbs);
1822   if (extra_rows == bs) extra_rows = 0;
1823   else                  mbs++;
1824   if (extra_rows) {
1825     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1826   }
1827 
1828   /* read in row lengths */
1829   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1830   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1831   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1832 
1833   /* read in column indices */
1834   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1835   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1836   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1837 
1838   /* loop over row lengths determining block row lengths */
1839   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1840   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1841   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1842   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1843   masked   = mask + mbs;
1844   rowcount = 0; nzcount = 0;
1845   for (i=0; i<mbs; i++) {
1846     nmask = 0;
1847     for (j=0; j<bs; j++) {
1848       kmax = rowlengths[rowcount];
1849       for (k=0; k<kmax; k++) {
1850         tmp = jj[nzcount++]/bs;   /* block col. index */
1851         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1852       }
1853       rowcount++;
1854     }
1855     s_browlengths[i] += nmask;
1856 
1857     /* zero out the mask elements we set */
1858     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1859   }
1860 
1861   /* create our matrix */
1862   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1863   ierr = MatSetType(B,type);CHKERRQ(ierr);
1864   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1865   a = (Mat_SeqSBAIJ*)B->data;
1866 
1867   /* set matrix "i" values */
1868   a->i[0] = 0;
1869   for (i=1; i<= mbs; i++) {
1870     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1871     a->ilen[i-1] = s_browlengths[i-1];
1872   }
1873   a->nz = a->i[mbs];
1874 
1875   /* read in nonzero values */
1876   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1877   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1878   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1879 
1880   /* set "a" and "j" values into matrix */
1881   nzcount = 0; jcount = 0;
1882   for (i=0; i<mbs; i++) {
1883     nzcountb = nzcount;
1884     nmask    = 0;
1885     for (j=0; j<bs; j++) {
1886       kmax = rowlengths[i*bs+j];
1887       for (k=0; k<kmax; k++) {
1888         tmp = jj[nzcount++]/bs; /* block col. index */
1889         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1890       }
1891     }
1892     /* sort the masked values */
1893     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1894 
1895     /* set "j" values into matrix */
1896     maskcount = 1;
1897     for (j=0; j<nmask; j++) {
1898       a->j[jcount++]  = masked[j];
1899       mask[masked[j]] = maskcount++;
1900     }
1901 
1902     /* set "a" values into matrix */
1903     ishift = bs2*a->i[i];
1904     for (j=0; j<bs; j++) {
1905       kmax = rowlengths[i*bs+j];
1906       for (k=0; k<kmax; k++) {
1907         tmp       = jj[nzcountb]/bs ; /* block col. index */
1908         if (tmp >= i){
1909           block     = mask[tmp] - 1;
1910           point     = jj[nzcountb] - bs*tmp;
1911           idx       = ishift + bs2*block + j + bs*point;
1912           a->a[idx] = aa[nzcountb];
1913         }
1914         nzcountb++;
1915       }
1916     }
1917     /* zero out the mask elements we set */
1918     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1919   }
1920   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1921 
1922   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1923   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1924   ierr = PetscFree(aa);CHKERRQ(ierr);
1925   ierr = PetscFree(jj);CHKERRQ(ierr);
1926   ierr = PetscFree(mask);CHKERRQ(ierr);
1927 
1928   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1929   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1930   ierr = MatView_Private(B);CHKERRQ(ierr);
1931   *A = B;
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1937 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1938 {
1939   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1940   MatScalar    *aa=a->a,*v,*v1;
1941   PetscScalar  *x,*b,*t,sum,d;
1942   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1943   int          nz,nz1,*vj,*vj1,i;
1944 
1945   PetscFunctionBegin;
1946   its = its*lits;
1947   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1948 
1949   if (bs > 1)
1950     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1951 
1952   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1953   if (xx != bb) {
1954     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1955   } else {
1956     b = x;
1957   }
1958 
1959   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1960 
1961   if (flag & SOR_ZERO_INITIAL_GUESS) {
1962     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1963       for (i=0; i<m; i++)
1964         t[i] = b[i];
1965 
1966       for (i=0; i<m; i++){
1967         d  = *(aa + ai[i]);  /* diag[i] */
1968         v  = aa + ai[i] + 1;
1969         vj = aj + ai[i] + 1;
1970         nz = ai[i+1] - ai[i] - 1;
1971         x[i] = omega*t[i]/d;
1972         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1973         PetscLogFlops(2*nz-1);
1974       }
1975     }
1976 
1977     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1978       for (i=0; i<m; i++)
1979         t[i] = b[i];
1980 
1981       for (i=0; i<m-1; i++){  /* update rhs */
1982         v  = aa + ai[i] + 1;
1983         vj = aj + ai[i] + 1;
1984         nz = ai[i+1] - ai[i] - 1;
1985         while (nz--) t[*vj++] -= x[i]*(*v++);
1986         PetscLogFlops(2*nz-1);
1987       }
1988       for (i=m-1; i>=0; i--){
1989         d  = *(aa + ai[i]);
1990         v  = aa + ai[i] + 1;
1991         vj = aj + ai[i] + 1;
1992         nz = ai[i+1] - ai[i] - 1;
1993         sum = t[i];
1994         while (nz--) sum -= x[*vj++]*(*v++);
1995         PetscLogFlops(2*nz-1);
1996         x[i] =   (1-omega)*x[i] + omega*sum/d;
1997       }
1998     }
1999     its--;
2000   }
2001 
2002   while (its--) {
2003     /*
2004        forward sweep:
2005        for i=0,...,m-1:
2006          sum[i] = (b[i] - U(i,:)x )/d[i];
2007          x[i]   = (1-omega)x[i] + omega*sum[i];
2008          b      = b - x[i]*U^T(i,:);
2009 
2010     */
2011     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2012       for (i=0; i<m; i++)
2013         t[i] = b[i];
2014 
2015       for (i=0; i<m; i++){
2016         d  = *(aa + ai[i]);  /* diag[i] */
2017         v  = aa + ai[i] + 1; v1=v;
2018         vj = aj + ai[i] + 1; vj1=vj;
2019         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2020         sum = t[i];
2021         while (nz1--) sum -= (*v1++)*x[*vj1++];
2022         x[i] = (1-omega)*x[i] + omega*sum/d;
2023         while (nz--) t[*vj++] -= x[i]*(*v++);
2024         PetscLogFlops(4*nz-2);
2025       }
2026     }
2027 
2028   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2029       /*
2030        backward sweep:
2031        b = b - x[i]*U^T(i,:), i=0,...,n-2
2032        for i=m-1,...,0:
2033          sum[i] = (b[i] - U(i,:)x )/d[i];
2034          x[i]   = (1-omega)x[i] + omega*sum[i];
2035       */
2036       for (i=0; i<m; i++)
2037         t[i] = b[i];
2038 
2039       for (i=0; i<m-1; i++){  /* update rhs */
2040         v  = aa + ai[i] + 1;
2041         vj = aj + ai[i] + 1;
2042         nz = ai[i+1] - ai[i] - 1;
2043         while (nz--) t[*vj++] -= x[i]*(*v++);
2044         PetscLogFlops(2*nz-1);
2045       }
2046       for (i=m-1; i>=0; i--){
2047         d  = *(aa + ai[i]);
2048         v  = aa + ai[i] + 1;
2049         vj = aj + ai[i] + 1;
2050         nz = ai[i+1] - ai[i] - 1;
2051         sum = t[i];
2052         while (nz--) sum -= x[*vj++]*(*v++);
2053         PetscLogFlops(2*nz-1);
2054         x[i] =   (1-omega)*x[i] + omega*sum/d;
2055       }
2056     }
2057   }
2058 
2059   ierr = PetscFree(t); CHKERRQ(ierr);
2060   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2061   if (bb != xx) {
2062     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2063   }
2064 
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 
2069 
2070 
2071 
2072 
2073