xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 9af31e4ad595286b4e2df8194fee047feeccfe42)
1 
2 /*
3     Defines the basic matrix operations for the SBAIJ (compressed row)
4   matrix storage format.
5 */
6 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
7 #include "src/inline/spops.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 
10 #define CHUNKSIZE  10
11 
12 /*
13      Checks for missing diagonals
14 */
15 #undef __FUNCT__
16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
17 int MatMissingDiagonal_SeqSBAIJ(Mat A)
18 {
19   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
20   int         *diag,*jj = a->j,i,ierr;
21 
22   PetscFunctionBegin;
23   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
24   diag = a->diag;
25   for (i=0; i<a->mbs; i++) {
26     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
27   }
28   PetscFunctionReturn(0);
29 }
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
33 int MatMarkDiagonal_SeqSBAIJ(Mat A)
34 {
35   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
36   int          i,mbs = a->mbs,ierr;
37 
38   PetscFunctionBegin;
39   if (a->diag) PetscFunctionReturn(0);
40 
41   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
42   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
43   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
44   PetscFunctionReturn(0);
45 }
46 
47 #undef __FUNCT__
48 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
49 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
50 {
51   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
52   int         n = a->mbs,i;
53 
54   PetscFunctionBegin;
55   *nn = n;
56   if (!ia) PetscFunctionReturn(0);
57 
58   if (oshift == 1) {
59     /* temporarily add 1 to i and j indices */
60     int nz = a->i[n];
61     for (i=0; i<nz; i++) a->j[i]++;
62     for (i=0; i<n+1; i++) a->i[i]++;
63     *ia = a->i; *ja = a->j;
64   } else {
65     *ia = a->i; *ja = a->j;
66   }
67   PetscFunctionReturn(0);
68 }
69 
70 #undef __FUNCT__
71 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
72 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
73 {
74   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
75   int         i,n = a->mbs;
76 
77   PetscFunctionBegin;
78   if (!ia) PetscFunctionReturn(0);
79 
80   if (oshift == 1) {
81     int nz = a->i[n]-1;
82     for (i=0; i<nz; i++) a->j[i]--;
83     for (i=0; i<n+1; i++) a->i[i]--;
84   }
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
90 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
91 {
92   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
93 
94   PetscFunctionBegin;
95   *bs = sbaij->bs;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
101 int MatDestroy_SeqSBAIJ(Mat A)
102 {
103   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
104   int         ierr;
105 
106   PetscFunctionBegin;
107 #if defined(PETSC_USE_LOG)
108   PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz);
109 #endif
110   ierr = PetscFree(a->a);CHKERRQ(ierr);
111   if (!a->singlemalloc) {
112     ierr = PetscFree(a->i);CHKERRQ(ierr);
113     ierr = PetscFree(a->j);CHKERRQ(ierr);
114   }
115   if (a->row) {
116     ierr = ISDestroy(a->row);CHKERRQ(ierr);
117   }
118   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
119   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
120   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
121   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
122   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
123   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
124   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
125   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
126   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
127 
128   if (a->inew){
129     ierr = PetscFree(a->inew);CHKERRQ(ierr);
130     a->inew = 0;
131   }
132   ierr = PetscFree(a);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
138 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
139 {
140   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
141 
142   PetscFunctionBegin;
143   switch (op) {
144   case MAT_ROW_ORIENTED:
145     a->roworiented    = PETSC_TRUE;
146     break;
147   case MAT_COLUMN_ORIENTED:
148     a->roworiented    = PETSC_FALSE;
149     break;
150   case MAT_COLUMNS_SORTED:
151     a->sorted         = PETSC_TRUE;
152     break;
153   case MAT_COLUMNS_UNSORTED:
154     a->sorted         = PETSC_FALSE;
155     break;
156   case MAT_KEEP_ZEROED_ROWS:
157     a->keepzeroedrows = PETSC_TRUE;
158     break;
159   case MAT_NO_NEW_NONZERO_LOCATIONS:
160     a->nonew          = 1;
161     break;
162   case MAT_NEW_NONZERO_LOCATION_ERR:
163     a->nonew          = -1;
164     break;
165   case MAT_NEW_NONZERO_ALLOCATION_ERR:
166     a->nonew          = -2;
167     break;
168   case MAT_YES_NEW_NONZERO_LOCATIONS:
169     a->nonew          = 0;
170     break;
171   case MAT_ROWS_SORTED:
172   case MAT_ROWS_UNSORTED:
173   case MAT_YES_NEW_DIAGONALS:
174   case MAT_IGNORE_OFF_PROC_ENTRIES:
175   case MAT_USE_HASH_TABLE:
176     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
177     break;
178   case MAT_NO_NEW_DIAGONALS:
179     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
180   case MAT_NOT_SYMMETRIC:
181   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
182   case MAT_HERMITIAN:
183     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
184   case MAT_SYMMETRIC:
185   case MAT_STRUCTURALLY_SYMMETRIC:
186   case MAT_NOT_HERMITIAN:
187   case MAT_SYMMETRY_ETERNAL:
188   case MAT_NOT_SYMMETRY_ETERNAL:
189     break;
190   default:
191     SETERRQ(PETSC_ERR_SUP,"unknown option");
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 #undef __FUNCT__
197 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
198 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
199 {
200   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
201   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
202   MatScalar    *aa,*aa_i;
203   PetscScalar  *v_i;
204 
205   PetscFunctionBegin;
206   bs  = a->bs;
207   ai  = a->i;
208   aj  = a->j;
209   aa  = a->a;
210   bs2 = a->bs2;
211 
212   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
213 
214   bn  = row/bs;   /* Block number */
215   bp  = row % bs; /* Block position */
216   M   = ai[bn+1] - ai[bn];
217   *ncols = bs*M;
218 
219   if (v) {
220     *v = 0;
221     if (*ncols) {
222       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
223       for (i=0; i<M; i++) { /* for each block in the block row */
224         v_i  = *v + i*bs;
225         aa_i = aa + bs2*(ai[bn] + i);
226         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
227       }
228     }
229   }
230 
231   if (cols) {
232     *cols = 0;
233     if (*ncols) {
234       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
235       for (i=0; i<M; i++) { /* for each block in the block row */
236         cols_i = *cols + i*bs;
237         itmp  = bs*aj[ai[bn] + i];
238         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
239       }
240     }
241   }
242 
243   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
244   /* this segment is currently removed, so only entries in the upper triangle are obtained */
245 #ifdef column_search
246   v_i    = *v    + M*bs;
247   cols_i = *cols + M*bs;
248   for (i=0; i<bn; i++){ /* for each block row */
249     M = ai[i+1] - ai[i];
250     for (j=0; j<M; j++){
251       itmp = aj[ai[i] + j];    /* block column value */
252       if (itmp == bn){
253         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
254         for (k=0; k<bs; k++) {
255           *cols_i++ = i*bs+k;
256           *v_i++    = aa_i[k];
257         }
258         *ncols += bs;
259         break;
260       }
261     }
262   }
263 #endif
264 
265   PetscFunctionReturn(0);
266 }
267 
268 #undef __FUNCT__
269 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
270 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
271 {
272   int ierr;
273 
274   PetscFunctionBegin;
275   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
276   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
282 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
283 {
284   int ierr;
285   PetscFunctionBegin;
286   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
287   PetscFunctionReturn(0);
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
292 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
293 {
294   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
295   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
296   char              *name;
297   PetscViewerFormat format;
298 
299   PetscFunctionBegin;
300   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
301   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
302   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
303     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
304   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
305     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
306   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
307     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
308     for (i=0; i<a->mbs; i++) {
309       for (j=0; j<bs; j++) {
310         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
311         for (k=a->i[i]; k<a->i[i+1]; k++) {
312           for (l=0; l<bs; l++) {
313 #if defined(PETSC_USE_COMPLEX)
314             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
315               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
316                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
317             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
318               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
319                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
320             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
321               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
322             }
323 #else
324             if (a->a[bs2*k + l*bs + j] != 0.0) {
325               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
326             }
327 #endif
328           }
329         }
330         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
331       }
332     }
333     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
334   } else {
335     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
336     for (i=0; i<a->mbs; i++) {
337       for (j=0; j<bs; j++) {
338         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
339         for (k=a->i[i]; k<a->i[i+1]; k++) {
340           for (l=0; l<bs; l++) {
341 #if defined(PETSC_USE_COMPLEX)
342             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
343               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
344                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
345             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
346               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
347                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
348             } else {
349               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
350             }
351 #else
352             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
353 #endif
354           }
355         }
356         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
357       }
358     }
359     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
360   }
361   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNCT__
366 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
367 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
368 {
369   Mat          A = (Mat) Aa;
370   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
371   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
372   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
373   MatScalar    *aa;
374   MPI_Comm     comm;
375   PetscViewer  viewer;
376 
377   PetscFunctionBegin;
378   /*
379       This is nasty. If this is called from an originally parallel matrix
380    then all processes call this,but only the first has the matrix so the
381    rest should return immediately.
382   */
383   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
384   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
385   if (rank) PetscFunctionReturn(0);
386 
387   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
388 
389   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
390   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
391 
392   /* loop over matrix elements drawing boxes */
393   color = PETSC_DRAW_BLUE;
394   for (i=0,row=0; i<mbs; i++,row+=bs) {
395     for (j=a->i[i]; j<a->i[i+1]; j++) {
396       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
397       x_l = a->j[j]*bs; x_r = x_l + 1.0;
398       aa = a->a + j*bs2;
399       for (k=0; k<bs; k++) {
400         for (l=0; l<bs; l++) {
401           if (PetscRealPart(*aa++) >=  0.) continue;
402           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
403         }
404       }
405     }
406   }
407   color = PETSC_DRAW_CYAN;
408   for (i=0,row=0; i<mbs; i++,row+=bs) {
409     for (j=a->i[i]; j<a->i[i+1]; j++) {
410       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
411       x_l = a->j[j]*bs; x_r = x_l + 1.0;
412       aa = a->a + j*bs2;
413       for (k=0; k<bs; k++) {
414         for (l=0; l<bs; l++) {
415           if (PetscRealPart(*aa++) != 0.) continue;
416           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
417         }
418       }
419     }
420   }
421 
422   color = PETSC_DRAW_RED;
423   for (i=0,row=0; i<mbs; i++,row+=bs) {
424     for (j=a->i[i]; j<a->i[i+1]; j++) {
425       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
426       x_l = a->j[j]*bs; x_r = x_l + 1.0;
427       aa = a->a + j*bs2;
428       for (k=0; k<bs; k++) {
429         for (l=0; l<bs; l++) {
430           if (PetscRealPart(*aa++) <= 0.) continue;
431           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
432         }
433       }
434     }
435   }
436   PetscFunctionReturn(0);
437 }
438 
439 #undef __FUNCT__
440 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
441 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
442 {
443   int          ierr;
444   PetscReal    xl,yl,xr,yr,w,h;
445   PetscDraw    draw;
446   PetscTruth   isnull;
447 
448   PetscFunctionBegin;
449 
450   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
451   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
452 
453   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
454   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
455   xr += w;    yr += h;  xl = -w;     yl = -h;
456   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
457   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
458   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatView_SeqSBAIJ"
464 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
465 {
466   int        ierr;
467   PetscTruth iascii,isdraw;
468 
469   PetscFunctionBegin;
470   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
471   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
472   if (iascii){
473     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
474   } else if (isdraw) {
475     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
476   } else {
477     Mat B;
478     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
479     ierr = MatView(B,viewer);CHKERRQ(ierr);
480     ierr = MatDestroy(B);CHKERRQ(ierr);
481   }
482   PetscFunctionReturn(0);
483 }
484 
485 
486 #undef __FUNCT__
487 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
488 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
489 {
490   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
491   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
492   int        *ai = a->i,*ailen = a->ilen;
493   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
494   MatScalar  *ap,*aa = a->a,zero = 0.0;
495 
496   PetscFunctionBegin;
497   for (k=0; k<m; k++) { /* loop over rows */
498     row  = im[k]; brow = row/bs;
499     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
500     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
501     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
502     nrow = ailen[brow];
503     for (l=0; l<n; l++) { /* loop over columns */
504       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
505       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
506       col  = in[l] ;
507       bcol = col/bs;
508       cidx = col%bs;
509       ridx = row%bs;
510       high = nrow;
511       low  = 0; /* assume unsorted */
512       while (high-low > 5) {
513         t = (low+high)/2;
514         if (rp[t] > bcol) high = t;
515         else             low  = t;
516       }
517       for (i=low; i<high; i++) {
518         if (rp[i] > bcol) break;
519         if (rp[i] == bcol) {
520           *v++ = ap[bs2*i+bs*cidx+ridx];
521           goto finished;
522         }
523       }
524       *v++ = zero;
525       finished:;
526     }
527   }
528   PetscFunctionReturn(0);
529 }
530 
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
534 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
535 {
536   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
537   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
538   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
539   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
540   PetscTruth      roworiented=a->roworiented;
541   const MatScalar *value = v;
542   MatScalar       *ap,*aa = a->a,*bap;
543 
544   PetscFunctionBegin;
545   if (roworiented) {
546     stepval = (n-1)*bs;
547   } else {
548     stepval = (m-1)*bs;
549   }
550   for (k=0; k<m; k++) { /* loop over added rows */
551     row  = im[k];
552     if (row < 0) continue;
553 #if defined(PETSC_USE_BOPT_g)
554     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
555 #endif
556     rp   = aj + ai[row];
557     ap   = aa + bs2*ai[row];
558     rmax = imax[row];
559     nrow = ailen[row];
560     low  = 0;
561     for (l=0; l<n; l++) { /* loop over added columns */
562       if (in[l] < 0) continue;
563       col = in[l];
564 #if defined(PETSC_USE_BOPT_g)
565       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
566 #endif
567       if (col < row) continue; /* ignore lower triangular block */
568       if (roworiented) {
569         value = v + k*(stepval+bs)*bs + l*bs;
570       } else {
571         value = v + l*(stepval+bs)*bs + k*bs;
572       }
573       if (!sorted) low = 0; high = nrow;
574       while (high-low > 7) {
575         t = (low+high)/2;
576         if (rp[t] > col) high = t;
577         else             low  = t;
578       }
579       for (i=low; i<high; i++) {
580         if (rp[i] > col) break;
581         if (rp[i] == col) {
582           bap  = ap +  bs2*i;
583           if (roworiented) {
584             if (is == ADD_VALUES) {
585               for (ii=0; ii<bs; ii++,value+=stepval) {
586                 for (jj=ii; jj<bs2; jj+=bs) {
587                   bap[jj] += *value++;
588                 }
589               }
590             } else {
591               for (ii=0; ii<bs; ii++,value+=stepval) {
592                 for (jj=ii; jj<bs2; jj+=bs) {
593                   bap[jj] = *value++;
594                 }
595               }
596             }
597           } else {
598             if (is == ADD_VALUES) {
599               for (ii=0; ii<bs; ii++,value+=stepval) {
600                 for (jj=0; jj<bs; jj++) {
601                   *bap++ += *value++;
602                 }
603               }
604             } else {
605               for (ii=0; ii<bs; ii++,value+=stepval) {
606                 for (jj=0; jj<bs; jj++) {
607                   *bap++  = *value++;
608                 }
609               }
610             }
611           }
612           goto noinsert2;
613         }
614       }
615       if (nonew == 1) goto noinsert2;
616       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
617       if (nrow >= rmax) {
618         /* there is no extra room in row, therefore enlarge */
619         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
620         MatScalar *new_a;
621 
622         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
623 
624         /* malloc new storage space */
625         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
626 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
627         new_j   = (int*)(new_a + bs2*new_nz);
628         new_i   = new_j + new_nz;
629 
630         /* copy over old data into new slots */
631         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
632         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
633         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
634         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
635         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
636         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
637         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
638         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
639         /* free up old matrix storage */
640         ierr = PetscFree(a->a);CHKERRQ(ierr);
641         if (!a->singlemalloc) {
642           ierr = PetscFree(a->i);CHKERRQ(ierr);
643           ierr = PetscFree(a->j);CHKERRQ(ierr);
644         }
645         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
646         a->singlemalloc = PETSC_TRUE;
647 
648         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
649         rmax = imax[row] = imax[row] + CHUNKSIZE;
650         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
651         a->maxnz += bs2*CHUNKSIZE;
652         a->reallocs++;
653         a->nz++;
654       }
655       N = nrow++ - 1;
656       /* shift up all the later entries in this row */
657       for (ii=N; ii>=i; ii--) {
658         rp[ii+1] = rp[ii];
659         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
660       }
661       if (N >= i) {
662         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
663       }
664       rp[i] = col;
665       bap   = ap +  bs2*i;
666       if (roworiented) {
667         for (ii=0; ii<bs; ii++,value+=stepval) {
668           for (jj=ii; jj<bs2; jj+=bs) {
669             bap[jj] = *value++;
670           }
671         }
672       } else {
673         for (ii=0; ii<bs; ii++,value+=stepval) {
674           for (jj=0; jj<bs; jj++) {
675             *bap++  = *value++;
676           }
677         }
678       }
679       noinsert2:;
680       low = i;
681     }
682     ailen[row] = nrow;
683   }
684   PetscFunctionReturn(0);
685 }
686 
687 #undef __FUNCT__
688 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
689 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
690 {
691   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
692   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
693   int        m = A->m,*ip,N,*ailen = a->ilen;
694   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
695   MatScalar  *aa = a->a,*ap;
696 
697   PetscFunctionBegin;
698   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
699 
700   if (m) rmax = ailen[0];
701   for (i=1; i<mbs; i++) {
702     /* move each row back by the amount of empty slots (fshift) before it*/
703     fshift += imax[i-1] - ailen[i-1];
704     rmax   = PetscMax(rmax,ailen[i]);
705     if (fshift) {
706       ip = aj + ai[i]; ap = aa + bs2*ai[i];
707       N = ailen[i];
708       for (j=0; j<N; j++) {
709         ip[j-fshift] = ip[j];
710         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
711       }
712     }
713     ai[i] = ai[i-1] + ailen[i-1];
714   }
715   if (mbs) {
716     fshift += imax[mbs-1] - ailen[mbs-1];
717     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
718   }
719   /* reset ilen and imax for each row */
720   for (i=0; i<mbs; i++) {
721     ailen[i] = imax[i] = ai[i+1] - ai[i];
722   }
723   a->nz = ai[mbs];
724 
725   /* diagonals may have moved, reset it */
726   if (a->diag) {
727     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
728   }
729   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
730            m,A->m,a->bs,fshift*bs2,a->nz*bs2);
731   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
732            a->reallocs);
733   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
734   a->reallocs          = 0;
735   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
736 
737   PetscFunctionReturn(0);
738 }
739 
740 /*
741    This function returns an array of flags which indicate the locations of contiguous
742    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
743    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
744    Assume: sizes should be long enough to hold all the values.
745 */
746 #undef __FUNCT__
747 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
748 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
749 {
750   int        i,j,k,row;
751   PetscTruth flg;
752 
753   PetscFunctionBegin;
754   for (i=0,j=0; i<n; j++) {
755     row = idx[i];
756     if (row%bs!=0) { /* Not the begining of a block */
757       sizes[j] = 1;
758       i++;
759     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
760       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
761       i++;
762     } else { /* Begining of the block, so check if the complete block exists */
763       flg = PETSC_TRUE;
764       for (k=1; k<bs; k++) {
765         if (row+k != idx[i+k]) { /* break in the block */
766           flg = PETSC_FALSE;
767           break;
768         }
769       }
770       if (flg == PETSC_TRUE) { /* No break in the bs */
771         sizes[j] = bs;
772         i+= bs;
773       } else {
774         sizes[j] = 1;
775         i++;
776       }
777     }
778   }
779   *bs_max = j;
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
785 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
786 {
787   PetscFunctionBegin;
788   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
789 }
790 
791 /* Only add/insert a(i,j) with i<=j (blocks).
792    Any a(i,j) with i>j input by user is ingored.
793 */
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
797 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
798 {
799   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
800   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
801   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
802   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
803   int         ridx,cidx,bs2=a->bs2,ierr;
804   MatScalar   *ap,value,*aa=a->a,*bap;
805 
806   PetscFunctionBegin;
807 
808   for (k=0; k<m; k++) { /* loop over added rows */
809     row  = im[k];       /* row number */
810     brow = row/bs;      /* block row number */
811     if (row < 0) continue;
812 #if defined(PETSC_USE_BOPT_g)
813     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
814 #endif
815     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
816     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
817     rmax = imax[brow];  /* maximum space allocated for this row */
818     nrow = ailen[brow]; /* actual length of this row */
819     low  = 0;
820 
821     for (l=0; l<n; l++) { /* loop over added columns */
822       if (in[l] < 0) continue;
823 #if defined(PETSC_USE_BOPT_g)
824       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
825 #endif
826       col = in[l];
827       bcol = col/bs;              /* block col number */
828 
829       if (brow <= bcol){
830         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
831         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
832           /* element value a(k,l) */
833           if (roworiented) {
834             value = v[l + k*n];
835           } else {
836             value = v[k + l*m];
837           }
838 
839           /* move pointer bap to a(k,l) quickly and add/insert value */
840           if (!sorted) low = 0; high = nrow;
841           while (high-low > 7) {
842             t = (low+high)/2;
843             if (rp[t] > bcol) high = t;
844             else              low  = t;
845           }
846           for (i=low; i<high; i++) {
847             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
848             if (rp[i] > bcol) break;
849             if (rp[i] == bcol) {
850               bap  = ap +  bs2*i + bs*cidx + ridx;
851               if (is == ADD_VALUES) *bap += value;
852               else                  *bap  = value;
853               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
854               if (brow == bcol && ridx < cidx){
855                 bap  = ap +  bs2*i + bs*ridx + cidx;
856                 if (is == ADD_VALUES) *bap += value;
857                 else                  *bap  = value;
858               }
859               goto noinsert1;
860             }
861           }
862 
863       if (nonew == 1) goto noinsert1;
864       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
865       if (nrow >= rmax) {
866         /* there is no extra room in row, therefore enlarge */
867         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
868         MatScalar *new_a;
869 
870         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
871 
872         /* Malloc new storage space */
873         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
874         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
875         new_j = (int*)(new_a + bs2*new_nz);
876         new_i = new_j + new_nz;
877 
878         /* copy over old data into new slots */
879         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
880         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
881         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
882         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
883         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
884         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
885         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
886         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
887         /* free up old matrix storage */
888         ierr = PetscFree(a->a);CHKERRQ(ierr);
889         if (!a->singlemalloc) {
890           ierr = PetscFree(a->i);CHKERRQ(ierr);
891           ierr = PetscFree(a->j);CHKERRQ(ierr);
892         }
893         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
894         a->singlemalloc = PETSC_TRUE;
895 
896         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
897         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
898         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
899         a->maxnz += bs2*CHUNKSIZE;
900         a->reallocs++;
901         a->nz++;
902       }
903 
904       N = nrow++ - 1;
905       /* shift up all the later entries in this row */
906       for (ii=N; ii>=i; ii--) {
907         rp[ii+1] = rp[ii];
908         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
909       }
910       if (N>=i) {
911         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
912       }
913       rp[i]                      = bcol;
914       ap[bs2*i + bs*cidx + ridx] = value;
915       noinsert1:;
916       low = i;
917       /* } */
918         }
919       } /* end of if .. if..  */
920     }                     /* end of loop over added columns */
921     ailen[brow] = nrow;
922   }                       /* end of loop over added rows */
923 
924   PetscFunctionReturn(0);
925 }
926 
927 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
928 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
929 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
930 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
931 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
932 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
933 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
934 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
935 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
936 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
937 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
938 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
939 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
940 extern int MatZeroEntries_SeqSBAIJ(Mat);
941 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
942 
943 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
944 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
945 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
946 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
947 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
948 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
949 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
950 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
951 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
952 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
953 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
954 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
955 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
956 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
957 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
958 
959 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
960 
961 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
962 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
963 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
964 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
965 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
966 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
967 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
968 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
969 
970 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
971 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
972 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
973 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
974 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
975 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
976 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
977 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
978 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
979 
980 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
981 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
982 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
983 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
984 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
985 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
986 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
987 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
988 
989 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
990 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
991 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
992 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
993 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
994 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
995 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
996 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
997 
998 #ifdef HAVE_MatICCFactor
999 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1002 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1003 {
1004   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1005   Mat         outA;
1006   int         ierr;
1007   PetscTruth  row_identity,col_identity;
1008 
1009   PetscFunctionBegin;
1010   /*
1011   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1012   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1013   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1014   if (!row_identity || !col_identity) {
1015     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1016   }
1017   */
1018 
1019   outA          = inA;
1020   inA->factor   = FACTOR_CHOLESKY;
1021 
1022   if (!a->diag) {
1023     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1024   }
1025   /*
1026       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1027       for ILU(0) factorization with natural ordering
1028   */
1029   switch (a->bs) {
1030   case 1:
1031     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1032     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1033     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1034     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1035   case 2:
1036     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1037     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1038     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1039     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1040     break;
1041   case 3:
1042     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1043     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1044     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1045     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1046     break;
1047   case 4:
1048     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1049     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1050     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1051     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1052     break;
1053   case 5:
1054     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1055     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1056     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1057     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1058     break;
1059   case 6:
1060     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1061     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1062     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1063     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1064     break;
1065   case 7:
1066     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1067     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1068     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1069     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1070     break;
1071   default:
1072     a->row        = row;
1073     a->icol       = col;
1074     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1075     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1076 
1077     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1078     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1079     PetscLogObjectParent(inA,a->icol);
1080 
1081     if (!a->solve_work) {
1082       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1083       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1084     }
1085   }
1086 
1087   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1088 
1089   PetscFunctionReturn(0);
1090 }
1091 #endif
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1095 int MatPrintHelp_SeqSBAIJ(Mat A)
1096 {
1097   static PetscTruth called = PETSC_FALSE;
1098   MPI_Comm          comm = A->comm;
1099   int               ierr;
1100 
1101   PetscFunctionBegin;
1102   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1103   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1104   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 EXTERN_C_BEGIN
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1111 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1112 {
1113   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1114   int         i,nz,n;
1115 
1116   PetscFunctionBegin;
1117   nz = baij->maxnz;
1118   n  = mat->n;
1119   for (i=0; i<nz; i++) {
1120     baij->j[i] = indices[i];
1121   }
1122   baij->nz = nz;
1123   for (i=0; i<n; i++) {
1124     baij->ilen[i] = baij->imax[i];
1125   }
1126 
1127   PetscFunctionReturn(0);
1128 }
1129 EXTERN_C_END
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1133 /*@
1134     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1135        in the matrix.
1136 
1137   Input Parameters:
1138 +  mat     - the SeqSBAIJ matrix
1139 -  indices - the column indices
1140 
1141   Level: advanced
1142 
1143   Notes:
1144     This can be called if you have precomputed the nonzero structure of the
1145   matrix and want to provide it to the matrix object to improve the performance
1146   of the MatSetValues() operation.
1147 
1148     You MUST have set the correct numbers of nonzeros per row in the call to
1149   MatCreateSeqSBAIJ().
1150 
1151     MUST be called before any calls to MatSetValues()
1152 
1153 .seealso: MatCreateSeqSBAIJ
1154 @*/
1155 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1156 {
1157   int ierr,(*f)(Mat,int *);
1158 
1159   PetscFunctionBegin;
1160   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1161   PetscValidPointer(indices,2);
1162   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1163   if (f) {
1164     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1165   } else {
1166     SETERRQ(1,"Wrong type of matrix to set column indices");
1167   }
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1173 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1174 {
1175   int        ierr;
1176 
1177   PetscFunctionBegin;
1178   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1184 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1185 {
1186   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1187   PetscFunctionBegin;
1188   *array = a->a;
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1194 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1195 {
1196   PetscFunctionBegin;
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #include "petscblaslapack.h"
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1203 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1204 {
1205   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1206   int          ierr,one=1,i,bs=y->bs,bs2,j;
1207 
1208   PetscFunctionBegin;
1209   if (str == SAME_NONZERO_PATTERN) {
1210     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1211   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1212     if (y->xtoy && y->XtoY != X) {
1213       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1214       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1215     }
1216     if (!y->xtoy) { /* get xtoy */
1217       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1218       y->XtoY = X;
1219     }
1220     bs2 = bs*bs;
1221     for (i=0; i<x->nz; i++) {
1222       j = 0;
1223       while (j < bs2){
1224         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1225         j++;
1226       }
1227     }
1228     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1229   } else {
1230     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1237 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1238 {
1239   PetscFunctionBegin;
1240   *flg = PETSC_TRUE;
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1246 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1247 {
1248   PetscFunctionBegin;
1249   *flg = PETSC_TRUE;
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1255 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1256 {
1257   PetscFunctionBegin;
1258   *flg = PETSC_FALSE;
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 /* -------------------------------------------------------------------*/
1263 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1264        MatGetRow_SeqSBAIJ,
1265        MatRestoreRow_SeqSBAIJ,
1266        MatMult_SeqSBAIJ_N,
1267 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1268        MatMultTranspose_SeqSBAIJ,
1269        MatMultTransposeAdd_SeqSBAIJ,
1270        MatSolve_SeqSBAIJ_N,
1271        0,
1272        0,
1273 /*10*/ 0,
1274        0,
1275        MatCholeskyFactor_SeqSBAIJ,
1276        MatRelax_SeqSBAIJ,
1277        MatTranspose_SeqSBAIJ,
1278 /*15*/ MatGetInfo_SeqSBAIJ,
1279        MatEqual_SeqSBAIJ,
1280        MatGetDiagonal_SeqSBAIJ,
1281        MatDiagonalScale_SeqSBAIJ,
1282        MatNorm_SeqSBAIJ,
1283 /*20*/ 0,
1284        MatAssemblyEnd_SeqSBAIJ,
1285        0,
1286        MatSetOption_SeqSBAIJ,
1287        MatZeroEntries_SeqSBAIJ,
1288 /*25*/ MatZeroRows_SeqSBAIJ,
1289        0,
1290        0,
1291        MatCholeskyFactorSymbolic_SeqSBAIJ,
1292        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1293 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1294        0,
1295        MatICCFactorSymbolic_SeqSBAIJ,
1296        MatGetArray_SeqSBAIJ,
1297        MatRestoreArray_SeqSBAIJ,
1298 /*35*/ MatDuplicate_SeqSBAIJ,
1299        0,
1300        0,
1301        0,
1302        0,
1303 /*40*/ MatAXPY_SeqSBAIJ,
1304        MatGetSubMatrices_SeqSBAIJ,
1305        MatIncreaseOverlap_SeqSBAIJ,
1306        MatGetValues_SeqSBAIJ,
1307        0,
1308 /*45*/ MatPrintHelp_SeqSBAIJ,
1309        MatScale_SeqSBAIJ,
1310        0,
1311        0,
1312        0,
1313 /*50*/ MatGetBlockSize_SeqSBAIJ,
1314        MatGetRowIJ_SeqSBAIJ,
1315        MatRestoreRowIJ_SeqSBAIJ,
1316        0,
1317        0,
1318 /*55*/ 0,
1319        0,
1320        0,
1321        0,
1322        MatSetValuesBlocked_SeqSBAIJ,
1323 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1324        0,
1325        0,
1326        MatGetPetscMaps_Petsc,
1327        0,
1328 /*65*/ 0,
1329        0,
1330        0,
1331        0,
1332        0,
1333 /*70*/ MatGetRowMax_SeqSBAIJ,
1334        0,
1335        0,
1336        0,
1337        0,
1338 /*75*/ 0,
1339        0,
1340        0,
1341        0,
1342        0,
1343 /*80*/ 0,
1344        0,
1345        0,
1346 #if !defined(PETSC_USE_COMPLEX)
1347        MatGetInertia_SeqSBAIJ,
1348 #else
1349        0,
1350 #endif
1351 /*85*/ MatLoad_SeqSBAIJ,
1352        MatIsSymmetric_SeqSBAIJ,
1353        MatIsStructurallySymmetric_SeqSBAIJ,
1354        MatIsHermitian_SeqSBAIJ
1355 };
1356 
1357 EXTERN_C_BEGIN
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1360 int MatStoreValues_SeqSBAIJ(Mat mat)
1361 {
1362   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1363   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1364   int         ierr;
1365 
1366   PetscFunctionBegin;
1367   if (aij->nonew != 1) {
1368     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1369   }
1370 
1371   /* allocate space for values if not already there */
1372   if (!aij->saved_values) {
1373     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1374   }
1375 
1376   /* copy values over */
1377   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 EXTERN_C_END
1381 
1382 EXTERN_C_BEGIN
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1385 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1386 {
1387   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1388   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1389 
1390   PetscFunctionBegin;
1391   if (aij->nonew != 1) {
1392     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1393   }
1394   if (!aij->saved_values) {
1395     SETERRQ(1,"Must call MatStoreValues(A);first");
1396   }
1397 
1398   /* copy values over */
1399   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 EXTERN_C_END
1403 
1404 EXTERN_C_BEGIN
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1407 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1408 {
1409   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1410   int          i,len,ierr,mbs,bs2;
1411   PetscTruth   flg;
1412 
1413   PetscFunctionBegin;
1414   B->preallocated = PETSC_TRUE;
1415   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1416   mbs  = B->m/bs;
1417   bs2  = bs*bs;
1418 
1419   if (mbs*bs != B->m) {
1420     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1421   }
1422 
1423   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1424   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1425   if (nnz) {
1426     for (i=0; i<mbs; i++) {
1427       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1428       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1429     }
1430   }
1431 
1432   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1433   if (!flg) {
1434     switch (bs) {
1435     case 1:
1436       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1437       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1438       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1439       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1440       B->ops->mult            = MatMult_SeqSBAIJ_1;
1441       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1442       break;
1443     case 2:
1444       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1445       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1446       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1447       B->ops->mult            = MatMult_SeqSBAIJ_2;
1448       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1449       break;
1450     case 3:
1451       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1452       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1453       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1454       B->ops->mult            = MatMult_SeqSBAIJ_3;
1455       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1456       break;
1457     case 4:
1458       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1459       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1460       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1461       B->ops->mult            = MatMult_SeqSBAIJ_4;
1462       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1463       break;
1464     case 5:
1465       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1466       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1467       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1468       B->ops->mult            = MatMult_SeqSBAIJ_5;
1469       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1470       break;
1471     case 6:
1472       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1473       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1474       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1475       B->ops->mult            = MatMult_SeqSBAIJ_6;
1476       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1477       break;
1478     case 7:
1479       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1480       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1481       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1482       B->ops->mult            = MatMult_SeqSBAIJ_7;
1483       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1484       break;
1485     }
1486   }
1487 
1488   b->mbs = mbs;
1489   b->nbs = mbs;
1490   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1491   if (!nnz) {
1492     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1493     else if (nz <= 0)        nz = 1;
1494     for (i=0; i<mbs; i++) {
1495       b->imax[i] = nz;
1496     }
1497     nz = nz*mbs; /* total nz */
1498   } else {
1499     nz = 0;
1500     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1501   }
1502   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1503 
1504   /* allocate the matrix space */
1505   len  = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1506   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1507   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1508   b->j = (int*)(b->a + nz*bs2);
1509   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1510   b->i = b->j + nz;
1511   b->singlemalloc = PETSC_TRUE;
1512 
1513   /* pointer to beginning of each row */
1514   b->i[0] = 0;
1515   for (i=1; i<mbs+1; i++) {
1516     b->i[i] = b->i[i-1] + b->imax[i-1];
1517   }
1518 
1519   /* b->ilen will count nonzeros in each block row so far. */
1520   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1521   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1522   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1523 
1524   b->bs               = bs;
1525   b->bs2              = bs2;
1526   b->nz             = 0;
1527   b->maxnz          = nz*bs2;
1528 
1529   b->inew             = 0;
1530   b->jnew             = 0;
1531   b->anew             = 0;
1532   b->a2anew           = 0;
1533   b->permute          = PETSC_FALSE;
1534   PetscFunctionReturn(0);
1535 }
1536 EXTERN_C_END
1537 
1538 EXTERN_C_BEGIN
1539 EXTERN int MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1540 EXTERN int MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*);
1541 EXTERN_C_END
1542 
1543 /*MC
1544    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1545    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1546 
1547    Options Database Keys:
1548 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1549 
1550   Level: beginner
1551 
1552 .seealso: MatCreateSeqSBAIJ
1553 M*/
1554 
1555 EXTERN_C_BEGIN
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1558 int MatCreate_SeqSBAIJ(Mat B)
1559 {
1560   Mat_SeqSBAIJ *b;
1561   int          ierr,size;
1562 
1563   PetscFunctionBegin;
1564   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1565   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1566   B->m = B->M = PetscMax(B->m,B->M);
1567   B->n = B->N = PetscMax(B->n,B->N);
1568 
1569   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1570   B->data = (void*)b;
1571   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1572   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1573   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1574   B->ops->view        = MatView_SeqSBAIJ;
1575   B->factor           = 0;
1576   B->lupivotthreshold = 1.0;
1577   B->mapping          = 0;
1578   b->row              = 0;
1579   b->icol             = 0;
1580   b->reallocs         = 0;
1581   b->saved_values     = 0;
1582 
1583   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1584   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1585 
1586   b->sorted           = PETSC_FALSE;
1587   b->roworiented      = PETSC_TRUE;
1588   b->nonew            = 0;
1589   b->diag             = 0;
1590   b->solve_work       = 0;
1591   b->mult_work        = 0;
1592   B->spptr            = 0;
1593   b->keepzeroedrows   = PETSC_FALSE;
1594   b->xtoy             = 0;
1595   b->XtoY             = 0;
1596 
1597   b->inew             = 0;
1598   b->jnew             = 0;
1599   b->anew             = 0;
1600   b->a2anew           = 0;
1601   b->permute          = PETSC_FALSE;
1602 
1603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1604                                      "MatStoreValues_SeqSBAIJ",
1605                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1607                                      "MatRetrieveValues_SeqSBAIJ",
1608                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1610                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1611                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1613                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1614                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1615   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1616                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1617                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1619                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1620                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1621 
1622   B->symmetric                  = PETSC_TRUE;
1623   B->structurally_symmetric     = PETSC_TRUE;
1624   B->symmetric_set              = PETSC_TRUE;
1625   B->structurally_symmetric_set = PETSC_TRUE;
1626   PetscFunctionReturn(0);
1627 }
1628 EXTERN_C_END
1629 
1630 #undef __FUNCT__
1631 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1632 /*@C
1633    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1634    compressed row) format.  For good matrix assembly performance the
1635    user should preallocate the matrix storage by setting the parameter nz
1636    (or the array nnz).  By setting these parameters accurately, performance
1637    during matrix assembly can be increased by more than a factor of 50.
1638 
1639    Collective on Mat
1640 
1641    Input Parameters:
1642 +  A - the symmetric matrix
1643 .  bs - size of block
1644 .  nz - number of block nonzeros per block row (same for all rows)
1645 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1646          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1647 
1648    Options Database Keys:
1649 .   -mat_no_unroll - uses code that does not unroll the loops in the
1650                      block calculations (much slower)
1651 .    -mat_block_size - size of the blocks to use
1652 
1653    Level: intermediate
1654 
1655    Notes:
1656    Specify the preallocated storage with either nz or nnz (not both).
1657    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1658    allocation.  For additional details, see the users manual chapter on
1659    matrices.
1660 
1661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1662 @*/
1663 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1664   int ierr,(*f)(Mat,int,int,const int[]);
1665 
1666   PetscFunctionBegin;
1667   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1668   if (f) {
1669     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1670   }
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatCreateSeqSBAIJ"
1676 /*@C
1677    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1678    compressed row) format.  For good matrix assembly performance the
1679    user should preallocate the matrix storage by setting the parameter nz
1680    (or the array nnz).  By setting these parameters accurately, performance
1681    during matrix assembly can be increased by more than a factor of 50.
1682 
1683    Collective on MPI_Comm
1684 
1685    Input Parameters:
1686 +  comm - MPI communicator, set to PETSC_COMM_SELF
1687 .  bs - size of block
1688 .  m - number of rows, or number of columns
1689 .  nz - number of block nonzeros per block row (same for all rows)
1690 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1691          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1692 
1693    Output Parameter:
1694 .  A - the symmetric matrix
1695 
1696    Options Database Keys:
1697 .   -mat_no_unroll - uses code that does not unroll the loops in the
1698                      block calculations (much slower)
1699 .    -mat_block_size - size of the blocks to use
1700 
1701    Level: intermediate
1702 
1703    Notes:
1704 
1705    Specify the preallocated storage with either nz or nnz (not both).
1706    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1707    allocation.  For additional details, see the users manual chapter on
1708    matrices.
1709 
1710 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1711 @*/
1712 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1713 {
1714   int ierr;
1715 
1716   PetscFunctionBegin;
1717   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1718   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1719   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 #undef __FUNCT__
1724 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1725 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1726 {
1727   Mat          C;
1728   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1729   int          i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1730 
1731   PetscFunctionBegin;
1732   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1733 
1734   *B = 0;
1735   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1736   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1737   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1738   c    = (Mat_SeqSBAIJ*)C->data;
1739 
1740   C->preallocated   = PETSC_TRUE;
1741   C->factor         = A->factor;
1742   c->row            = 0;
1743   c->icol           = 0;
1744   c->saved_values   = 0;
1745   c->keepzeroedrows = a->keepzeroedrows;
1746   C->assembled      = PETSC_TRUE;
1747 
1748   C->M    = A->M;
1749   C->N    = A->N;
1750   c->bs   = a->bs;
1751   c->bs2  = a->bs2;
1752   c->mbs  = a->mbs;
1753   c->nbs  = a->nbs;
1754 
1755   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1756   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1757   for (i=0; i<mbs; i++) {
1758     c->imax[i] = a->imax[i];
1759     c->ilen[i] = a->ilen[i];
1760   }
1761 
1762   /* allocate the matrix space */
1763   c->singlemalloc = PETSC_TRUE;
1764   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1765   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1766   c->j = (int*)(c->a + nz*bs2);
1767   c->i = c->j + nz;
1768   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1769   if (mbs > 0) {
1770     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1771     if (cpvalues == MAT_COPY_VALUES) {
1772       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1773     } else {
1774       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1775     }
1776   }
1777 
1778   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1779   c->sorted      = a->sorted;
1780   c->roworiented = a->roworiented;
1781   c->nonew       = a->nonew;
1782 
1783   if (a->diag) {
1784     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1785     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1786     for (i=0; i<mbs; i++) {
1787       c->diag[i] = a->diag[i];
1788     }
1789   } else c->diag        = 0;
1790   c->nz               = a->nz;
1791   c->maxnz            = a->maxnz;
1792   c->solve_work         = 0;
1793   c->mult_work          = 0;
1794   *B = C;
1795   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1801 int MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1802 {
1803   Mat_SeqSBAIJ *a;
1804   Mat          B;
1805   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1806   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1807   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1808   int          *masked,nmask,tmp,bs2,ishift;
1809   PetscScalar  *aa;
1810   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1811 
1812   PetscFunctionBegin;
1813   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1814   bs2  = bs*bs;
1815 
1816   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1817   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1818   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1819   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1820   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1821   M = header[1]; N = header[2]; nz = header[3];
1822 
1823   if (header[3] < 0) {
1824     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1825   }
1826 
1827   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1828 
1829   /*
1830      This code adds extra rows to make sure the number of rows is
1831     divisible by the blocksize
1832   */
1833   mbs        = M/bs;
1834   extra_rows = bs - M + bs*(mbs);
1835   if (extra_rows == bs) extra_rows = 0;
1836   else                  mbs++;
1837   if (extra_rows) {
1838     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1839   }
1840 
1841   /* read in row lengths */
1842   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1843   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1844   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1845 
1846   /* read in column indices */
1847   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1848   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1849   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1850 
1851   /* loop over row lengths determining block row lengths */
1852   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1853   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1854   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1855   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1856   masked   = mask + mbs;
1857   rowcount = 0; nzcount = 0;
1858   for (i=0; i<mbs; i++) {
1859     nmask = 0;
1860     for (j=0; j<bs; j++) {
1861       kmax = rowlengths[rowcount];
1862       for (k=0; k<kmax; k++) {
1863         tmp = jj[nzcount++]/bs;   /* block col. index */
1864         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1865       }
1866       rowcount++;
1867     }
1868     s_browlengths[i] += nmask;
1869 
1870     /* zero out the mask elements we set */
1871     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1872   }
1873 
1874   /* create our matrix */
1875   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1876   ierr = MatSetType(B,type);CHKERRQ(ierr);
1877   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1878   a = (Mat_SeqSBAIJ*)B->data;
1879 
1880   /* set matrix "i" values */
1881   a->i[0] = 0;
1882   for (i=1; i<= mbs; i++) {
1883     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1884     a->ilen[i-1] = s_browlengths[i-1];
1885   }
1886   a->nz = a->i[mbs];
1887 
1888   /* read in nonzero values */
1889   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1890   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1891   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1892 
1893   /* set "a" and "j" values into matrix */
1894   nzcount = 0; jcount = 0;
1895   for (i=0; i<mbs; i++) {
1896     nzcountb = nzcount;
1897     nmask    = 0;
1898     for (j=0; j<bs; j++) {
1899       kmax = rowlengths[i*bs+j];
1900       for (k=0; k<kmax; k++) {
1901         tmp = jj[nzcount++]/bs; /* block col. index */
1902         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1903       }
1904     }
1905     /* sort the masked values */
1906     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1907 
1908     /* set "j" values into matrix */
1909     maskcount = 1;
1910     for (j=0; j<nmask; j++) {
1911       a->j[jcount++]  = masked[j];
1912       mask[masked[j]] = maskcount++;
1913     }
1914 
1915     /* set "a" values into matrix */
1916     ishift = bs2*a->i[i];
1917     for (j=0; j<bs; j++) {
1918       kmax = rowlengths[i*bs+j];
1919       for (k=0; k<kmax; k++) {
1920         tmp       = jj[nzcountb]/bs ; /* block col. index */
1921         if (tmp >= i){
1922           block     = mask[tmp] - 1;
1923           point     = jj[nzcountb] - bs*tmp;
1924           idx       = ishift + bs2*block + j + bs*point;
1925           a->a[idx] = aa[nzcountb];
1926         }
1927         nzcountb++;
1928       }
1929     }
1930     /* zero out the mask elements we set */
1931     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1932   }
1933   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1934 
1935   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1936   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1937   ierr = PetscFree(aa);CHKERRQ(ierr);
1938   ierr = PetscFree(jj);CHKERRQ(ierr);
1939   ierr = PetscFree(mask);CHKERRQ(ierr);
1940 
1941   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1942   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1943   ierr = MatView_Private(B);CHKERRQ(ierr);
1944   *A = B;
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1950 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1951 {
1952   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1953   MatScalar    *aa=a->a,*v,*v1;
1954   PetscScalar  *x,*b,*t,sum,d;
1955   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1956   int          nz,nz1,*vj,*vj1,i;
1957 
1958   PetscFunctionBegin;
1959   its = its*lits;
1960   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1961 
1962   if (bs > 1)
1963     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1964 
1965   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1966   if (xx != bb) {
1967     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1968   } else {
1969     b = x;
1970   }
1971 
1972   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1973 
1974   if (flag & SOR_ZERO_INITIAL_GUESS) {
1975     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1976       for (i=0; i<m; i++)
1977         t[i] = b[i];
1978 
1979       for (i=0; i<m; i++){
1980         d  = *(aa + ai[i]);  /* diag[i] */
1981         v  = aa + ai[i] + 1;
1982         vj = aj + ai[i] + 1;
1983         nz = ai[i+1] - ai[i] - 1;
1984         x[i] = omega*t[i]/d;
1985         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1986         PetscLogFlops(2*nz-1);
1987       }
1988     }
1989 
1990     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1991       for (i=0; i<m; i++)
1992         t[i] = b[i];
1993 
1994       for (i=0; i<m-1; i++){  /* update rhs */
1995         v  = aa + ai[i] + 1;
1996         vj = aj + ai[i] + 1;
1997         nz = ai[i+1] - ai[i] - 1;
1998         while (nz--) t[*vj++] -= x[i]*(*v++);
1999         PetscLogFlops(2*nz-1);
2000       }
2001       for (i=m-1; i>=0; i--){
2002         d  = *(aa + ai[i]);
2003         v  = aa + ai[i] + 1;
2004         vj = aj + ai[i] + 1;
2005         nz = ai[i+1] - ai[i] - 1;
2006         sum = t[i];
2007         while (nz--) sum -= x[*vj++]*(*v++);
2008         PetscLogFlops(2*nz-1);
2009         x[i] =   (1-omega)*x[i] + omega*sum/d;
2010       }
2011     }
2012     its--;
2013   }
2014 
2015   while (its--) {
2016     /*
2017        forward sweep:
2018        for i=0,...,m-1:
2019          sum[i] = (b[i] - U(i,:)x )/d[i];
2020          x[i]   = (1-omega)x[i] + omega*sum[i];
2021          b      = b - x[i]*U^T(i,:);
2022 
2023     */
2024     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2025       for (i=0; i<m; i++)
2026         t[i] = b[i];
2027 
2028       for (i=0; i<m; i++){
2029         d  = *(aa + ai[i]);  /* diag[i] */
2030         v  = aa + ai[i] + 1; v1=v;
2031         vj = aj + ai[i] + 1; vj1=vj;
2032         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2033         sum = t[i];
2034         while (nz1--) sum -= (*v1++)*x[*vj1++];
2035         x[i] = (1-omega)*x[i] + omega*sum/d;
2036         while (nz--) t[*vj++] -= x[i]*(*v++);
2037         PetscLogFlops(4*nz-2);
2038       }
2039     }
2040 
2041   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2042       /*
2043        backward sweep:
2044        b = b - x[i]*U^T(i,:), i=0,...,n-2
2045        for i=m-1,...,0:
2046          sum[i] = (b[i] - U(i,:)x )/d[i];
2047          x[i]   = (1-omega)x[i] + omega*sum[i];
2048       */
2049       for (i=0; i<m; i++)
2050         t[i] = b[i];
2051 
2052       for (i=0; i<m-1; i++){  /* update rhs */
2053         v  = aa + ai[i] + 1;
2054         vj = aj + ai[i] + 1;
2055         nz = ai[i+1] - ai[i] - 1;
2056         while (nz--) t[*vj++] -= x[i]*(*v++);
2057         PetscLogFlops(2*nz-1);
2058       }
2059       for (i=m-1; i>=0; i--){
2060         d  = *(aa + ai[i]);
2061         v  = aa + ai[i] + 1;
2062         vj = aj + ai[i] + 1;
2063         nz = ai[i+1] - ai[i] - 1;
2064         sum = t[i];
2065         while (nz--) sum -= x[*vj++]*(*v++);
2066         PetscLogFlops(2*nz-1);
2067         x[i] =   (1-omega)*x[i] + omega*sum/d;
2068       }
2069     }
2070   }
2071 
2072   ierr = PetscFree(t);CHKERRQ(ierr);
2073   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2074   if (bb != xx) {
2075     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2076   }
2077 
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 
2082 
2083 
2084 
2085 
2086