xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision c66d48e9ed6f4bcdb337a579d892af16f451b500)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int nz = a->i[n];
63     for (i=0; i<nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int nz = a->i[n]-1;
84     for (i=0; i<nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   case MAT_NOT_SYMMETRIC:
183   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
184   case MAT_HERMITIAN:
185     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
186   case MAT_SYMMETRIC:
187   case MAT_STRUCTURALLY_SYMMETRIC:
188   case MAT_NOT_HERMITIAN:
189   case MAT_SYMMETRY_ETERNAL:
190   case MAT_NOT_SYMMETRY_ETERNAL:
191     break;
192   default:
193     SETERRQ(PETSC_ERR_SUP,"unknown option");
194   }
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
200 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
201 {
202   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
203   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
204   MatScalar    *aa,*aa_i;
205   PetscScalar  *v_i;
206 
207   PetscFunctionBegin;
208   bs  = a->bs;
209   ai  = a->i;
210   aj  = a->j;
211   aa  = a->a;
212   bs2 = a->bs2;
213 
214   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
215 
216   bn  = row/bs;   /* Block number */
217   bp  = row % bs; /* Block position */
218   M   = ai[bn+1] - ai[bn];
219   *ncols = bs*M;
220 
221   if (v) {
222     *v = 0;
223     if (*ncols) {
224       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
225       for (i=0; i<M; i++) { /* for each block in the block row */
226         v_i  = *v + i*bs;
227         aa_i = aa + bs2*(ai[bn] + i);
228         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
229       }
230     }
231   }
232 
233   if (cols) {
234     *cols = 0;
235     if (*ncols) {
236       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
237       for (i=0; i<M; i++) { /* for each block in the block row */
238         cols_i = *cols + i*bs;
239         itmp  = bs*aj[ai[bn] + i];
240         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
241       }
242     }
243   }
244 
245   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
246   /* this segment is currently removed, so only entries in the upper triangle are obtained */
247 #ifdef column_search
248   v_i    = *v    + M*bs;
249   cols_i = *cols + M*bs;
250   for (i=0; i<bn; i++){ /* for each block row */
251     M = ai[i+1] - ai[i];
252     for (j=0; j<M; j++){
253       itmp = aj[ai[i] + j];    /* block column value */
254       if (itmp == bn){
255         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
256         for (k=0; k<bs; k++) {
257           *cols_i++ = i*bs+k;
258           *v_i++    = aa_i[k];
259         }
260         *ncols += bs;
261         break;
262       }
263     }
264   }
265 #endif
266 
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
272 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
273 {
274   int ierr;
275 
276   PetscFunctionBegin;
277   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
278   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
284 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
285 {
286   int ierr;
287   PetscFunctionBegin;
288   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
294 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
295 {
296   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
297   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
298   char              *name;
299   PetscViewerFormat format;
300 
301   PetscFunctionBegin;
302   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
303   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
304   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
305     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
306   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
307     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
308   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
309     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
310     for (i=0; i<a->mbs; i++) {
311       for (j=0; j<bs; j++) {
312         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
313         for (k=a->i[i]; k<a->i[i+1]; k++) {
314           for (l=0; l<bs; l++) {
315 #if defined(PETSC_USE_COMPLEX)
316             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
317               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
318                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
319             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
320               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
321                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
322             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
323               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
324             }
325 #else
326             if (a->a[bs2*k + l*bs + j] != 0.0) {
327               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
328             }
329 #endif
330           }
331         }
332         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
333       }
334     }
335     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
336   } else {
337     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
338     for (i=0; i<a->mbs; i++) {
339       for (j=0; j<bs; j++) {
340         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
341         for (k=a->i[i]; k<a->i[i+1]; k++) {
342           for (l=0; l<bs; l++) {
343 #if defined(PETSC_USE_COMPLEX)
344             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
345               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
346                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
347             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
348               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
349                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
350             } else {
351               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
352             }
353 #else
354             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
355 #endif
356           }
357         }
358         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
359       }
360     }
361     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
362   }
363   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
364   PetscFunctionReturn(0);
365 }
366 
367 #undef __FUNCT__
368 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
369 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
370 {
371   Mat          A = (Mat) Aa;
372   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
373   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
374   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
375   MatScalar    *aa;
376   MPI_Comm     comm;
377   PetscViewer  viewer;
378 
379   PetscFunctionBegin;
380   /*
381       This is nasty. If this is called from an originally parallel matrix
382    then all processes call this,but only the first has the matrix so the
383    rest should return immediately.
384   */
385   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
386   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
387   if (rank) PetscFunctionReturn(0);
388 
389   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
390 
391   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
392   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
393 
394   /* loop over matrix elements drawing boxes */
395   color = PETSC_DRAW_BLUE;
396   for (i=0,row=0; i<mbs; i++,row+=bs) {
397     for (j=a->i[i]; j<a->i[i+1]; j++) {
398       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
399       x_l = a->j[j]*bs; x_r = x_l + 1.0;
400       aa = a->a + j*bs2;
401       for (k=0; k<bs; k++) {
402         for (l=0; l<bs; l++) {
403           if (PetscRealPart(*aa++) >=  0.) continue;
404           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
405         }
406       }
407     }
408   }
409   color = PETSC_DRAW_CYAN;
410   for (i=0,row=0; i<mbs; i++,row+=bs) {
411     for (j=a->i[i]; j<a->i[i+1]; j++) {
412       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
413       x_l = a->j[j]*bs; x_r = x_l + 1.0;
414       aa = a->a + j*bs2;
415       for (k=0; k<bs; k++) {
416         for (l=0; l<bs; l++) {
417           if (PetscRealPart(*aa++) != 0.) continue;
418           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
419         }
420       }
421     }
422   }
423 
424   color = PETSC_DRAW_RED;
425   for (i=0,row=0; i<mbs; i++,row+=bs) {
426     for (j=a->i[i]; j<a->i[i+1]; j++) {
427       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
428       x_l = a->j[j]*bs; x_r = x_l + 1.0;
429       aa = a->a + j*bs2;
430       for (k=0; k<bs; k++) {
431         for (l=0; l<bs; l++) {
432           if (PetscRealPart(*aa++) <= 0.) continue;
433           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
434         }
435       }
436     }
437   }
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
443 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
444 {
445   int          ierr;
446   PetscReal    xl,yl,xr,yr,w,h;
447   PetscDraw    draw;
448   PetscTruth   isnull;
449 
450   PetscFunctionBegin;
451 
452   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
453   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
454 
455   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
456   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
457   xr += w;    yr += h;  xl = -w;     yl = -h;
458   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
459   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
460   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatView_SeqSBAIJ"
466 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
467 {
468   int        ierr;
469   PetscTruth isascii,isdraw;
470 
471   PetscFunctionBegin;
472   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
473   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
474   if (isascii){
475     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
476   } else if (isdraw) {
477     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
478   } else {
479     Mat B;
480     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
481     ierr = MatView(B,viewer);CHKERRQ(ierr);
482     ierr = MatDestroy(B);CHKERRQ(ierr);
483   }
484   PetscFunctionReturn(0);
485 }
486 
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
490 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
491 {
492   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
493   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
494   int        *ai = a->i,*ailen = a->ilen;
495   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
496   MatScalar  *ap,*aa = a->a,zero = 0.0;
497 
498   PetscFunctionBegin;
499   for (k=0; k<m; k++) { /* loop over rows */
500     row  = im[k]; brow = row/bs;
501     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
502     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
503     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
504     nrow = ailen[brow];
505     for (l=0; l<n; l++) { /* loop over columns */
506       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
507       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
508       col  = in[l] ;
509       bcol = col/bs;
510       cidx = col%bs;
511       ridx = row%bs;
512       high = nrow;
513       low  = 0; /* assume unsorted */
514       while (high-low > 5) {
515         t = (low+high)/2;
516         if (rp[t] > bcol) high = t;
517         else             low  = t;
518       }
519       for (i=low; i<high; i++) {
520         if (rp[i] > bcol) break;
521         if (rp[i] == bcol) {
522           *v++ = ap[bs2*i+bs*cidx+ridx];
523           goto finished;
524         }
525       }
526       *v++ = zero;
527       finished:;
528     }
529   }
530   PetscFunctionReturn(0);
531 }
532 
533 
534 #undef __FUNCT__
535 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
536 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
537 {
538   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
539   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
540   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
541   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
542   PetscTruth      roworiented=a->roworiented;
543   const MatScalar *value = v;
544   MatScalar       *ap,*aa = a->a,*bap;
545 
546   PetscFunctionBegin;
547   if (roworiented) {
548     stepval = (n-1)*bs;
549   } else {
550     stepval = (m-1)*bs;
551   }
552   for (k=0; k<m; k++) { /* loop over added rows */
553     row  = im[k];
554     if (row < 0) continue;
555 #if defined(PETSC_USE_BOPT_g)
556     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
557 #endif
558     rp   = aj + ai[row];
559     ap   = aa + bs2*ai[row];
560     rmax = imax[row];
561     nrow = ailen[row];
562     low  = 0;
563     for (l=0; l<n; l++) { /* loop over added columns */
564       if (in[l] < 0) continue;
565       col = in[l];
566 #if defined(PETSC_USE_BOPT_g)
567       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
568 #endif
569       if (col < row) continue; /* ignore lower triangular block */
570       if (roworiented) {
571         value = v + k*(stepval+bs)*bs + l*bs;
572       } else {
573         value = v + l*(stepval+bs)*bs + k*bs;
574       }
575       if (!sorted) low = 0; high = nrow;
576       while (high-low > 7) {
577         t = (low+high)/2;
578         if (rp[t] > col) high = t;
579         else             low  = t;
580       }
581       for (i=low; i<high; i++) {
582         if (rp[i] > col) break;
583         if (rp[i] == col) {
584           bap  = ap +  bs2*i;
585           if (roworiented) {
586             if (is == ADD_VALUES) {
587               for (ii=0; ii<bs; ii++,value+=stepval) {
588                 for (jj=ii; jj<bs2; jj+=bs) {
589                   bap[jj] += *value++;
590                 }
591               }
592             } else {
593               for (ii=0; ii<bs; ii++,value+=stepval) {
594                 for (jj=ii; jj<bs2; jj+=bs) {
595                   bap[jj] = *value++;
596                 }
597               }
598             }
599           } else {
600             if (is == ADD_VALUES) {
601               for (ii=0; ii<bs; ii++,value+=stepval) {
602                 for (jj=0; jj<bs; jj++) {
603                   *bap++ += *value++;
604                 }
605               }
606             } else {
607               for (ii=0; ii<bs; ii++,value+=stepval) {
608                 for (jj=0; jj<bs; jj++) {
609                   *bap++  = *value++;
610                 }
611               }
612             }
613           }
614           goto noinsert2;
615         }
616       }
617       if (nonew == 1) goto noinsert2;
618       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
619       if (nrow >= rmax) {
620         /* there is no extra room in row, therefore enlarge */
621         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
622         MatScalar *new_a;
623 
624         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
625 
626         /* malloc new storage space */
627         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
628 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
629         new_j   = (int*)(new_a + bs2*new_nz);
630         new_i   = new_j + new_nz;
631 
632         /* copy over old data into new slots */
633         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
634         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
635         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
636         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
637         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
638         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
639         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
640         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
641         /* free up old matrix storage */
642         ierr = PetscFree(a->a);CHKERRQ(ierr);
643         if (!a->singlemalloc) {
644           ierr = PetscFree(a->i);CHKERRQ(ierr);
645           ierr = PetscFree(a->j);CHKERRQ(ierr);
646         }
647         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
648         a->singlemalloc = PETSC_TRUE;
649 
650         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
651         rmax = imax[row] = imax[row] + CHUNKSIZE;
652         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
653         a->maxnz += bs2*CHUNKSIZE;
654         a->reallocs++;
655         a->nz++;
656       }
657       N = nrow++ - 1;
658       /* shift up all the later entries in this row */
659       for (ii=N; ii>=i; ii--) {
660         rp[ii+1] = rp[ii];
661         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
662       }
663       if (N >= i) {
664         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
665       }
666       rp[i] = col;
667       bap   = ap +  bs2*i;
668       if (roworiented) {
669         for (ii=0; ii<bs; ii++,value+=stepval) {
670           for (jj=ii; jj<bs2; jj+=bs) {
671             bap[jj] = *value++;
672           }
673         }
674       } else {
675         for (ii=0; ii<bs; ii++,value+=stepval) {
676           for (jj=0; jj<bs; jj++) {
677             *bap++  = *value++;
678           }
679         }
680       }
681       noinsert2:;
682       low = i;
683     }
684     ailen[row] = nrow;
685   }
686   PetscFunctionReturn(0);
687 }
688 
689 #undef __FUNCT__
690 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
691 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
692 {
693   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
694   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
695   int        m = A->m,*ip,N,*ailen = a->ilen;
696   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
697   MatScalar  *aa = a->a,*ap;
698 
699   PetscFunctionBegin;
700   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
701 
702   if (m) rmax = ailen[0];
703   for (i=1; i<mbs; i++) {
704     /* move each row back by the amount of empty slots (fshift) before it*/
705     fshift += imax[i-1] - ailen[i-1];
706     rmax   = PetscMax(rmax,ailen[i]);
707     if (fshift) {
708       ip = aj + ai[i]; ap = aa + bs2*ai[i];
709       N = ailen[i];
710       for (j=0; j<N; j++) {
711         ip[j-fshift] = ip[j];
712         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
713       }
714     }
715     ai[i] = ai[i-1] + ailen[i-1];
716   }
717   if (mbs) {
718     fshift += imax[mbs-1] - ailen[mbs-1];
719     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
720   }
721   /* reset ilen and imax for each row */
722   for (i=0; i<mbs; i++) {
723     ailen[i] = imax[i] = ai[i+1] - ai[i];
724   }
725   a->nz = ai[mbs];
726 
727   /* diagonals may have moved, reset it */
728   if (a->diag) {
729     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
730   }
731   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
732            m,A->m,a->bs,fshift*bs2,a->nz*bs2);
733   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
734            a->reallocs);
735   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
736   a->reallocs          = 0;
737   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
738 
739   PetscFunctionReturn(0);
740 }
741 
742 /*
743    This function returns an array of flags which indicate the locations of contiguous
744    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
745    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
746    Assume: sizes should be long enough to hold all the values.
747 */
748 #undef __FUNCT__
749 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
750 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
751 {
752   int        i,j,k,row;
753   PetscTruth flg;
754 
755   PetscFunctionBegin;
756   for (i=0,j=0; i<n; j++) {
757     row = idx[i];
758     if (row%bs!=0) { /* Not the begining of a block */
759       sizes[j] = 1;
760       i++;
761     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
762       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
763       i++;
764     } else { /* Begining of the block, so check if the complete block exists */
765       flg = PETSC_TRUE;
766       for (k=1; k<bs; k++) {
767         if (row+k != idx[i+k]) { /* break in the block */
768           flg = PETSC_FALSE;
769           break;
770         }
771       }
772       if (flg == PETSC_TRUE) { /* No break in the bs */
773         sizes[j] = bs;
774         i+= bs;
775       } else {
776         sizes[j] = 1;
777         i++;
778       }
779     }
780   }
781   *bs_max = j;
782   PetscFunctionReturn(0);
783 }
784 
785 #undef __FUNCT__
786 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
787 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
788 {
789   PetscFunctionBegin;
790   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
791 }
792 
793 /* Only add/insert a(i,j) with i<=j (blocks).
794    Any a(i,j) with i>j input by user is ingored.
795 */
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
799 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
800 {
801   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
802   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
803   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
804   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
805   int         ridx,cidx,bs2=a->bs2,ierr;
806   MatScalar   *ap,value,*aa=a->a,*bap;
807 
808   PetscFunctionBegin;
809 
810   for (k=0; k<m; k++) { /* loop over added rows */
811     row  = im[k];       /* row number */
812     brow = row/bs;      /* block row number */
813     if (row < 0) continue;
814 #if defined(PETSC_USE_BOPT_g)
815     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
816 #endif
817     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
818     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
819     rmax = imax[brow];  /* maximum space allocated for this row */
820     nrow = ailen[brow]; /* actual length of this row */
821     low  = 0;
822 
823     for (l=0; l<n; l++) { /* loop over added columns */
824       if (in[l] < 0) continue;
825 #if defined(PETSC_USE_BOPT_g)
826       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
827 #endif
828       col = in[l];
829       bcol = col/bs;              /* block col number */
830 
831       if (brow <= bcol){
832         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
833         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
834           /* element value a(k,l) */
835           if (roworiented) {
836             value = v[l + k*n];
837           } else {
838             value = v[k + l*m];
839           }
840 
841           /* move pointer bap to a(k,l) quickly and add/insert value */
842           if (!sorted) low = 0; high = nrow;
843           while (high-low > 7) {
844             t = (low+high)/2;
845             if (rp[t] > bcol) high = t;
846             else              low  = t;
847           }
848           for (i=low; i<high; i++) {
849             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
850             if (rp[i] > bcol) break;
851             if (rp[i] == bcol) {
852               bap  = ap +  bs2*i + bs*cidx + ridx;
853               if (is == ADD_VALUES) *bap += value;
854               else                  *bap  = value;
855               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
856               if (brow == bcol && ridx < cidx){
857                 bap  = ap +  bs2*i + bs*ridx + cidx;
858                 if (is == ADD_VALUES) *bap += value;
859                 else                  *bap  = value;
860               }
861               goto noinsert1;
862             }
863           }
864 
865       if (nonew == 1) goto noinsert1;
866       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
867       if (nrow >= rmax) {
868         /* there is no extra room in row, therefore enlarge */
869         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
870         MatScalar *new_a;
871 
872         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
873 
874         /* Malloc new storage space */
875         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
876         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
877         new_j = (int*)(new_a + bs2*new_nz);
878         new_i = new_j + new_nz;
879 
880         /* copy over old data into new slots */
881         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
882         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
883         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
884         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
885         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
886         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
887         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
888         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
889         /* free up old matrix storage */
890         ierr = PetscFree(a->a);CHKERRQ(ierr);
891         if (!a->singlemalloc) {
892           ierr = PetscFree(a->i);CHKERRQ(ierr);
893           ierr = PetscFree(a->j);CHKERRQ(ierr);
894         }
895         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
896         a->singlemalloc = PETSC_TRUE;
897 
898         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
899         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
900         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
901         a->maxnz += bs2*CHUNKSIZE;
902         a->reallocs++;
903         a->nz++;
904       }
905 
906       N = nrow++ - 1;
907       /* shift up all the later entries in this row */
908       for (ii=N; ii>=i; ii--) {
909         rp[ii+1] = rp[ii];
910         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
911       }
912       if (N>=i) {
913         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
914       }
915       rp[i]                      = bcol;
916       ap[bs2*i + bs*cidx + ridx] = value;
917       noinsert1:;
918       low = i;
919       /* } */
920         }
921       } /* end of if .. if..  */
922     }                     /* end of loop over added columns */
923     ailen[brow] = nrow;
924   }                       /* end of loop over added rows */
925 
926   PetscFunctionReturn(0);
927 }
928 
929 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
930 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
931 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
932 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
933 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
934 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
935 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
936 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
937 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
938 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
939 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
940 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
941 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
942 extern int MatZeroEntries_SeqSBAIJ(Mat);
943 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
944 
945 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
946 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
947 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
948 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
949 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
950 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
951 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
952 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
953 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
954 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
955 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
956 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
957 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
958 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
959 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
960 
961 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
962 
963 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
964 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
965 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
966 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
967 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
968 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
969 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
970 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
971 
972 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
973 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
974 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
975 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
976 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
977 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
978 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
979 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
980 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
981 
982 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
983 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
984 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
985 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
986 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
987 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
988 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
989 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
990 
991 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
992 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
993 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
994 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
995 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
996 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
997 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
998 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
999 
1000 #ifdef HAVE_MatICCFactor
1001 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1004 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1005 {
1006   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1007   Mat         outA;
1008   int         ierr;
1009   PetscTruth  row_identity,col_identity;
1010 
1011   PetscFunctionBegin;
1012   /*
1013   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1014   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1015   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1016   if (!row_identity || !col_identity) {
1017     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1018   }
1019   */
1020 
1021   outA          = inA;
1022   inA->factor   = FACTOR_CHOLESKY;
1023 
1024   if (!a->diag) {
1025     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1026   }
1027   /*
1028       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1029       for ILU(0) factorization with natural ordering
1030   */
1031   switch (a->bs) {
1032   case 1:
1033     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1034     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1035     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1036     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1037   case 2:
1038     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1039     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1040     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1041     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1042     break;
1043   case 3:
1044     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1045     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1046     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1047     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1048     break;
1049   case 4:
1050     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1051     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1052     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1053     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1054     break;
1055   case 5:
1056     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1057     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1058     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1059     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1060     break;
1061   case 6:
1062     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1063     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1064     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1065     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1066     break;
1067   case 7:
1068     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1069     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1070     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1071     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1072     break;
1073   default:
1074     a->row        = row;
1075     a->icol       = col;
1076     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1077     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1078 
1079     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1080     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1081     PetscLogObjectParent(inA,a->icol);
1082 
1083     if (!a->solve_work) {
1084       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1085       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1086     }
1087   }
1088 
1089   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1090 
1091   PetscFunctionReturn(0);
1092 }
1093 #endif
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1097 int MatPrintHelp_SeqSBAIJ(Mat A)
1098 {
1099   static PetscTruth called = PETSC_FALSE;
1100   MPI_Comm          comm = A->comm;
1101   int               ierr;
1102 
1103   PetscFunctionBegin;
1104   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1105   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1106   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 EXTERN_C_BEGIN
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1113 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1114 {
1115   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1116   int         i,nz,n;
1117 
1118   PetscFunctionBegin;
1119   nz = baij->maxnz;
1120   n  = mat->n;
1121   for (i=0; i<nz; i++) {
1122     baij->j[i] = indices[i];
1123   }
1124   baij->nz = nz;
1125   for (i=0; i<n; i++) {
1126     baij->ilen[i] = baij->imax[i];
1127   }
1128 
1129   PetscFunctionReturn(0);
1130 }
1131 EXTERN_C_END
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1135 /*@
1136     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1137        in the matrix.
1138 
1139   Input Parameters:
1140 +  mat     - the SeqSBAIJ matrix
1141 -  indices - the column indices
1142 
1143   Level: advanced
1144 
1145   Notes:
1146     This can be called if you have precomputed the nonzero structure of the
1147   matrix and want to provide it to the matrix object to improve the performance
1148   of the MatSetValues() operation.
1149 
1150     You MUST have set the correct numbers of nonzeros per row in the call to
1151   MatCreateSeqSBAIJ().
1152 
1153     MUST be called before any calls to MatSetValues()
1154 
1155 .seealso: MatCreateSeqSBAIJ
1156 @*/
1157 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1158 {
1159   int ierr,(*f)(Mat,int *);
1160 
1161   PetscFunctionBegin;
1162   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1163   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1164   if (f) {
1165     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1166   } else {
1167     SETERRQ(1,"Wrong type of matrix to set column indices");
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1174 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1175 {
1176   int        ierr;
1177 
1178   PetscFunctionBegin;
1179   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1185 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1186 {
1187   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1188   PetscFunctionBegin;
1189   *array = a->a;
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1195 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1196 {
1197   PetscFunctionBegin;
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #include "petscblaslapack.h"
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1204 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1205 {
1206   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1207   int          ierr,one=1,i,bs=y->bs,bs2,j;
1208 
1209   PetscFunctionBegin;
1210   if (str == SAME_NONZERO_PATTERN) {
1211     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1212   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1213     if (y->xtoy && y->XtoY != X) {
1214       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1215       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1216     }
1217     if (!y->xtoy) { /* get xtoy */
1218       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1219       y->XtoY = X;
1220     }
1221     bs2 = bs*bs;
1222     for (i=0; i<x->nz; i++) {
1223       j = 0;
1224       while (j < bs2){
1225         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1226         j++;
1227       }
1228     }
1229     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1230   } else {
1231     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1232   }
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1238 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1239 {
1240   PetscFunctionBegin;
1241   *flg = PETSC_TRUE;
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1247 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1248 {
1249   PetscFunctionBegin;
1250   *flg = PETSC_TRUE;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1256 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1257 {
1258   PetscFunctionBegin;
1259   *flg = PETSC_FALSE;
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 /* -------------------------------------------------------------------*/
1264 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1265        MatGetRow_SeqSBAIJ,
1266        MatRestoreRow_SeqSBAIJ,
1267        MatMult_SeqSBAIJ_N,
1268 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1269        MatMultTranspose_SeqSBAIJ,
1270        MatMultTransposeAdd_SeqSBAIJ,
1271        MatSolve_SeqSBAIJ_N,
1272        0,
1273        0,
1274 /*10*/ 0,
1275        0,
1276        MatCholeskyFactor_SeqSBAIJ,
1277        MatRelax_SeqSBAIJ,
1278        MatTranspose_SeqSBAIJ,
1279 /*15*/ MatGetInfo_SeqSBAIJ,
1280        MatEqual_SeqSBAIJ,
1281        MatGetDiagonal_SeqSBAIJ,
1282        MatDiagonalScale_SeqSBAIJ,
1283        MatNorm_SeqSBAIJ,
1284 /*20*/ 0,
1285        MatAssemblyEnd_SeqSBAIJ,
1286        0,
1287        MatSetOption_SeqSBAIJ,
1288        MatZeroEntries_SeqSBAIJ,
1289 /*25*/ MatZeroRows_SeqSBAIJ,
1290        0,
1291        0,
1292        MatCholeskyFactorSymbolic_SeqSBAIJ,
1293        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1294 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1295        0,
1296        MatICCFactorSymbolic_SeqSBAIJ,
1297        MatGetArray_SeqSBAIJ,
1298        MatRestoreArray_SeqSBAIJ,
1299 /*35*/ MatDuplicate_SeqSBAIJ,
1300        0,
1301        0,
1302        0,
1303        0,
1304 /*40*/ MatAXPY_SeqSBAIJ,
1305        MatGetSubMatrices_SeqSBAIJ,
1306        MatIncreaseOverlap_SeqSBAIJ,
1307        MatGetValues_SeqSBAIJ,
1308        0,
1309 /*45*/ MatPrintHelp_SeqSBAIJ,
1310        MatScale_SeqSBAIJ,
1311        0,
1312        0,
1313        0,
1314 /*50*/ MatGetBlockSize_SeqSBAIJ,
1315        MatGetRowIJ_SeqSBAIJ,
1316        MatRestoreRowIJ_SeqSBAIJ,
1317        0,
1318        0,
1319 /*55*/ 0,
1320        0,
1321        0,
1322        0,
1323        MatSetValuesBlocked_SeqSBAIJ,
1324 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1325        0,
1326        0,
1327        MatGetPetscMaps_Petsc,
1328        0,
1329 /*65*/ 0,
1330        0,
1331        0,
1332        0,
1333        0,
1334 /*70*/ MatGetRowMax_SeqSBAIJ,
1335        0,
1336        0,
1337        0,
1338        0,
1339 /*75*/ 0,
1340        0,
1341        0,
1342        0,
1343        0,
1344 /*80*/ 0,
1345        0,
1346        0,
1347 #if !defined(PETSC_USE_COMPLEX)
1348        MatGetInertia_SeqSBAIJ,
1349 #else
1350        0,
1351 #endif
1352 /*85*/ MatLoad_SeqSBAIJ,
1353        MatIsSymmetric_SeqSBAIJ,
1354        MatIsStructurallySymmetric_SeqSBAIJ,
1355        MatIsHermitian_SeqSBAIJ
1356 };
1357 
1358 EXTERN_C_BEGIN
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1361 int MatStoreValues_SeqSBAIJ(Mat mat)
1362 {
1363   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1364   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1365   int         ierr;
1366 
1367   PetscFunctionBegin;
1368   if (aij->nonew != 1) {
1369     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1370   }
1371 
1372   /* allocate space for values if not already there */
1373   if (!aij->saved_values) {
1374     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1375   }
1376 
1377   /* copy values over */
1378   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 EXTERN_C_END
1382 
1383 EXTERN_C_BEGIN
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1386 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1387 {
1388   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1389   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1390 
1391   PetscFunctionBegin;
1392   if (aij->nonew != 1) {
1393     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1394   }
1395   if (!aij->saved_values) {
1396     SETERRQ(1,"Must call MatStoreValues(A);first");
1397   }
1398 
1399   /* copy values over */
1400   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1401   PetscFunctionReturn(0);
1402 }
1403 EXTERN_C_END
1404 
1405 EXTERN_C_BEGIN
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1408 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1409 {
1410   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1411   int          i,len,ierr,mbs,bs2;
1412   PetscTruth   flg;
1413 
1414   PetscFunctionBegin;
1415   B->preallocated = PETSC_TRUE;
1416   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1417   mbs  = B->m/bs;
1418   bs2  = bs*bs;
1419 
1420   if (mbs*bs != B->m) {
1421     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1422   }
1423 
1424   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1425   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1426   if (nnz) {
1427     for (i=0; i<mbs; i++) {
1428       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1429       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1430     }
1431   }
1432 
1433   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1434   if (!flg) {
1435     switch (bs) {
1436     case 1:
1437       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1438       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1439       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1440       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1441       B->ops->mult            = MatMult_SeqSBAIJ_1;
1442       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1443       break;
1444     case 2:
1445       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1446       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1447       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1448       B->ops->mult            = MatMult_SeqSBAIJ_2;
1449       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1450       break;
1451     case 3:
1452       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1453       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1454       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1455       B->ops->mult            = MatMult_SeqSBAIJ_3;
1456       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1457       break;
1458     case 4:
1459       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1460       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1461       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1462       B->ops->mult            = MatMult_SeqSBAIJ_4;
1463       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1464       break;
1465     case 5:
1466       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1467       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1468       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1469       B->ops->mult            = MatMult_SeqSBAIJ_5;
1470       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1471       break;
1472     case 6:
1473       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1474       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1475       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1476       B->ops->mult            = MatMult_SeqSBAIJ_6;
1477       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1478       break;
1479     case 7:
1480       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1481       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1482       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1483       B->ops->mult            = MatMult_SeqSBAIJ_7;
1484       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1485       break;
1486     }
1487   }
1488 
1489   b->mbs = mbs;
1490   b->nbs = mbs;
1491   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1492   if (!nnz) {
1493     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1494     else if (nz <= 0)        nz = 1;
1495     for (i=0; i<mbs; i++) {
1496       b->imax[i] = nz;
1497     }
1498     nz = nz*mbs; /* total nz */
1499   } else {
1500     nz = 0;
1501     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1502   }
1503   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1504 
1505   /* allocate the matrix space */
1506   len  = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1507   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1508   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1509   b->j = (int*)(b->a + nz*bs2);
1510   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1511   b->i = b->j + nz;
1512   b->singlemalloc = PETSC_TRUE;
1513 
1514   /* pointer to beginning of each row */
1515   b->i[0] = 0;
1516   for (i=1; i<mbs+1; i++) {
1517     b->i[i] = b->i[i-1] + b->imax[i-1];
1518   }
1519 
1520   /* b->ilen will count nonzeros in each block row so far. */
1521   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1522   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1523   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1524 
1525   b->bs               = bs;
1526   b->bs2              = bs2;
1527   b->nz             = 0;
1528   b->maxnz          = nz*bs2;
1529 
1530   b->inew             = 0;
1531   b->jnew             = 0;
1532   b->anew             = 0;
1533   b->a2anew           = 0;
1534   b->permute          = PETSC_FALSE;
1535   PetscFunctionReturn(0);
1536 }
1537 EXTERN_C_END
1538 
1539 EXTERN_C_BEGIN
1540 EXTERN int MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1541 EXTERN_C_END
1542 
1543 /*MC
1544    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1545    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1546 
1547    Options Database Keys:
1548 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1549 
1550   Level: beginner
1551 
1552 .seealso: MatCreateSeqSBAIJ
1553 M*/
1554 
1555 EXTERN_C_BEGIN
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1558 int MatCreate_SeqSBAIJ(Mat B)
1559 {
1560   Mat_SeqSBAIJ *b;
1561   int          ierr,size;
1562 
1563   PetscFunctionBegin;
1564   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1565   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1566   B->m = B->M = PetscMax(B->m,B->M);
1567   B->n = B->N = PetscMax(B->n,B->N);
1568 
1569   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1570   B->data = (void*)b;
1571   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1572   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1573   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1574   B->ops->view        = MatView_SeqSBAIJ;
1575   B->factor           = 0;
1576   B->lupivotthreshold = 1.0;
1577   B->mapping          = 0;
1578   b->row              = 0;
1579   b->icol             = 0;
1580   b->reallocs         = 0;
1581   b->saved_values     = 0;
1582 
1583   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1584   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1585 
1586   b->sorted           = PETSC_FALSE;
1587   b->roworiented      = PETSC_TRUE;
1588   b->nonew            = 0;
1589   b->diag             = 0;
1590   b->solve_work       = 0;
1591   b->mult_work        = 0;
1592   B->spptr            = 0;
1593   b->keepzeroedrows   = PETSC_FALSE;
1594   b->xtoy             = 0;
1595   b->XtoY             = 0;
1596 
1597   b->inew             = 0;
1598   b->jnew             = 0;
1599   b->anew             = 0;
1600   b->a2anew           = 0;
1601   b->permute          = PETSC_FALSE;
1602 
1603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1604                                      "MatStoreValues_SeqSBAIJ",
1605                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1607                                      "MatRetrieveValues_SeqSBAIJ",
1608                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1610                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1611                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1613                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1614                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1615   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1616                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1617                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1618   PetscFunctionReturn(0);
1619 }
1620 EXTERN_C_END
1621 
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1624 /*@C
1625    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1626    compressed row) format.  For good matrix assembly performance the
1627    user should preallocate the matrix storage by setting the parameter nz
1628    (or the array nnz).  By setting these parameters accurately, performance
1629    during matrix assembly can be increased by more than a factor of 50.
1630 
1631    Collective on Mat
1632 
1633    Input Parameters:
1634 +  A - the symmetric matrix
1635 .  bs - size of block
1636 .  nz - number of block nonzeros per block row (same for all rows)
1637 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1638          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1639 
1640    Options Database Keys:
1641 .   -mat_no_unroll - uses code that does not unroll the loops in the
1642                      block calculations (much slower)
1643 .    -mat_block_size - size of the blocks to use
1644 
1645    Level: intermediate
1646 
1647    Notes:
1648    Specify the preallocated storage with either nz or nnz (not both).
1649    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1650    allocation.  For additional details, see the users manual chapter on
1651    matrices.
1652 
1653 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1654 @*/
1655 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1656   int ierr,(*f)(Mat,int,int,const int[]);
1657 
1658   PetscFunctionBegin;
1659   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1660   if (f) {
1661     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1662   }
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatCreateSeqSBAIJ"
1668 /*@C
1669    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1670    compressed row) format.  For good matrix assembly performance the
1671    user should preallocate the matrix storage by setting the parameter nz
1672    (or the array nnz).  By setting these parameters accurately, performance
1673    during matrix assembly can be increased by more than a factor of 50.
1674 
1675    Collective on MPI_Comm
1676 
1677    Input Parameters:
1678 +  comm - MPI communicator, set to PETSC_COMM_SELF
1679 .  bs - size of block
1680 .  m - number of rows, or number of columns
1681 .  nz - number of block nonzeros per block row (same for all rows)
1682 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1683          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1684 
1685    Output Parameter:
1686 .  A - the symmetric matrix
1687 
1688    Options Database Keys:
1689 .   -mat_no_unroll - uses code that does not unroll the loops in the
1690                      block calculations (much slower)
1691 .    -mat_block_size - size of the blocks to use
1692 
1693    Level: intermediate
1694 
1695    Notes:
1696 
1697    Specify the preallocated storage with either nz or nnz (not both).
1698    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1699    allocation.  For additional details, see the users manual chapter on
1700    matrices.
1701 
1702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1703 @*/
1704 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1705 {
1706   int ierr;
1707 
1708   PetscFunctionBegin;
1709   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1710   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1711   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1717 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1718 {
1719   Mat         C;
1720   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1721   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1722 
1723   PetscFunctionBegin;
1724   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1725 
1726   *B = 0;
1727   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1728   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1729   c    = (Mat_SeqSBAIJ*)C->data;
1730 
1731   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1732   C->preallocated   = PETSC_TRUE;
1733   C->factor         = A->factor;
1734   c->row            = 0;
1735   c->icol           = 0;
1736   c->saved_values   = 0;
1737   c->keepzeroedrows = a->keepzeroedrows;
1738   C->assembled      = PETSC_TRUE;
1739 
1740   c->bs         = a->bs;
1741   c->bs2        = a->bs2;
1742   c->mbs        = a->mbs;
1743   c->nbs        = a->nbs;
1744 
1745   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1746   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1747   for (i=0; i<mbs; i++) {
1748     c->imax[i] = a->imax[i];
1749     c->ilen[i] = a->ilen[i];
1750   }
1751 
1752   /* allocate the matrix space */
1753   c->singlemalloc = PETSC_TRUE;
1754   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1755   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1756   c->j = (int*)(c->a + nz*bs2);
1757   c->i = c->j + nz;
1758   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1759   if (mbs > 0) {
1760     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1761     if (cpvalues == MAT_COPY_VALUES) {
1762       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1763     } else {
1764       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1765     }
1766   }
1767 
1768   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1769   c->sorted      = a->sorted;
1770   c->roworiented = a->roworiented;
1771   c->nonew       = a->nonew;
1772 
1773   if (a->diag) {
1774     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1775     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1776     for (i=0; i<mbs; i++) {
1777       c->diag[i] = a->diag[i];
1778     }
1779   } else c->diag        = 0;
1780   c->nz               = a->nz;
1781   c->maxnz            = a->maxnz;
1782   c->solve_work         = 0;
1783   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1784   c->mult_work          = 0;
1785   *B = C;
1786   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1792 int MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1793 {
1794   Mat_SeqSBAIJ *a;
1795   Mat          B;
1796   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1797   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1798   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1799   int          *masked,nmask,tmp,bs2,ishift;
1800   PetscScalar  *aa;
1801   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1802 
1803   PetscFunctionBegin;
1804   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1805   bs2  = bs*bs;
1806 
1807   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1808   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1809   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1810   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1811   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1812   M = header[1]; N = header[2]; nz = header[3];
1813 
1814   if (header[3] < 0) {
1815     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1816   }
1817 
1818   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1819 
1820   /*
1821      This code adds extra rows to make sure the number of rows is
1822     divisible by the blocksize
1823   */
1824   mbs        = M/bs;
1825   extra_rows = bs - M + bs*(mbs);
1826   if (extra_rows == bs) extra_rows = 0;
1827   else                  mbs++;
1828   if (extra_rows) {
1829     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1830   }
1831 
1832   /* read in row lengths */
1833   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1834   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1835   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1836 
1837   /* read in column indices */
1838   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1839   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1840   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1841 
1842   /* loop over row lengths determining block row lengths */
1843   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1844   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1845   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1846   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1847   masked   = mask + mbs;
1848   rowcount = 0; nzcount = 0;
1849   for (i=0; i<mbs; i++) {
1850     nmask = 0;
1851     for (j=0; j<bs; j++) {
1852       kmax = rowlengths[rowcount];
1853       for (k=0; k<kmax; k++) {
1854         tmp = jj[nzcount++]/bs;   /* block col. index */
1855         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1856       }
1857       rowcount++;
1858     }
1859     s_browlengths[i] += nmask;
1860 
1861     /* zero out the mask elements we set */
1862     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1863   }
1864 
1865   /* create our matrix */
1866   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1867   ierr = MatSetType(B,type);CHKERRQ(ierr);
1868   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1869   a = (Mat_SeqSBAIJ*)B->data;
1870 
1871   /* set matrix "i" values */
1872   a->i[0] = 0;
1873   for (i=1; i<= mbs; i++) {
1874     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1875     a->ilen[i-1] = s_browlengths[i-1];
1876   }
1877   a->nz = a->i[mbs];
1878 
1879   /* read in nonzero values */
1880   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1881   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1882   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1883 
1884   /* set "a" and "j" values into matrix */
1885   nzcount = 0; jcount = 0;
1886   for (i=0; i<mbs; i++) {
1887     nzcountb = nzcount;
1888     nmask    = 0;
1889     for (j=0; j<bs; j++) {
1890       kmax = rowlengths[i*bs+j];
1891       for (k=0; k<kmax; k++) {
1892         tmp = jj[nzcount++]/bs; /* block col. index */
1893         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1894       }
1895     }
1896     /* sort the masked values */
1897     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1898 
1899     /* set "j" values into matrix */
1900     maskcount = 1;
1901     for (j=0; j<nmask; j++) {
1902       a->j[jcount++]  = masked[j];
1903       mask[masked[j]] = maskcount++;
1904     }
1905 
1906     /* set "a" values into matrix */
1907     ishift = bs2*a->i[i];
1908     for (j=0; j<bs; j++) {
1909       kmax = rowlengths[i*bs+j];
1910       for (k=0; k<kmax; k++) {
1911         tmp       = jj[nzcountb]/bs ; /* block col. index */
1912         if (tmp >= i){
1913           block     = mask[tmp] - 1;
1914           point     = jj[nzcountb] - bs*tmp;
1915           idx       = ishift + bs2*block + j + bs*point;
1916           a->a[idx] = aa[nzcountb];
1917         }
1918         nzcountb++;
1919       }
1920     }
1921     /* zero out the mask elements we set */
1922     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1923   }
1924   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1925 
1926   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1927   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1928   ierr = PetscFree(aa);CHKERRQ(ierr);
1929   ierr = PetscFree(jj);CHKERRQ(ierr);
1930   ierr = PetscFree(mask);CHKERRQ(ierr);
1931 
1932   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1933   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1934   ierr = MatView_Private(B);CHKERRQ(ierr);
1935   *A = B;
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1941 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1942 {
1943   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1944   MatScalar    *aa=a->a,*v,*v1;
1945   PetscScalar  *x,*b,*t,sum,d;
1946   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1947   int          nz,nz1,*vj,*vj1,i;
1948 
1949   PetscFunctionBegin;
1950   its = its*lits;
1951   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1952 
1953   if (bs > 1)
1954     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1955 
1956   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1957   if (xx != bb) {
1958     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1959   } else {
1960     b = x;
1961   }
1962 
1963   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1964 
1965   if (flag & SOR_ZERO_INITIAL_GUESS) {
1966     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1967       for (i=0; i<m; i++)
1968         t[i] = b[i];
1969 
1970       for (i=0; i<m; i++){
1971         d  = *(aa + ai[i]);  /* diag[i] */
1972         v  = aa + ai[i] + 1;
1973         vj = aj + ai[i] + 1;
1974         nz = ai[i+1] - ai[i] - 1;
1975         x[i] = omega*t[i]/d;
1976         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1977         PetscLogFlops(2*nz-1);
1978       }
1979     }
1980 
1981     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1982       for (i=0; i<m; i++)
1983         t[i] = b[i];
1984 
1985       for (i=0; i<m-1; i++){  /* update rhs */
1986         v  = aa + ai[i] + 1;
1987         vj = aj + ai[i] + 1;
1988         nz = ai[i+1] - ai[i] - 1;
1989         while (nz--) t[*vj++] -= x[i]*(*v++);
1990         PetscLogFlops(2*nz-1);
1991       }
1992       for (i=m-1; i>=0; i--){
1993         d  = *(aa + ai[i]);
1994         v  = aa + ai[i] + 1;
1995         vj = aj + ai[i] + 1;
1996         nz = ai[i+1] - ai[i] - 1;
1997         sum = t[i];
1998         while (nz--) sum -= x[*vj++]*(*v++);
1999         PetscLogFlops(2*nz-1);
2000         x[i] =   (1-omega)*x[i] + omega*sum/d;
2001       }
2002     }
2003     its--;
2004   }
2005 
2006   while (its--) {
2007     /*
2008        forward sweep:
2009        for i=0,...,m-1:
2010          sum[i] = (b[i] - U(i,:)x )/d[i];
2011          x[i]   = (1-omega)x[i] + omega*sum[i];
2012          b      = b - x[i]*U^T(i,:);
2013 
2014     */
2015     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2016       for (i=0; i<m; i++)
2017         t[i] = b[i];
2018 
2019       for (i=0; i<m; i++){
2020         d  = *(aa + ai[i]);  /* diag[i] */
2021         v  = aa + ai[i] + 1; v1=v;
2022         vj = aj + ai[i] + 1; vj1=vj;
2023         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2024         sum = t[i];
2025         while (nz1--) sum -= (*v1++)*x[*vj1++];
2026         x[i] = (1-omega)*x[i] + omega*sum/d;
2027         while (nz--) t[*vj++] -= x[i]*(*v++);
2028         PetscLogFlops(4*nz-2);
2029       }
2030     }
2031 
2032   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2033       /*
2034        backward sweep:
2035        b = b - x[i]*U^T(i,:), i=0,...,n-2
2036        for i=m-1,...,0:
2037          sum[i] = (b[i] - U(i,:)x )/d[i];
2038          x[i]   = (1-omega)x[i] + omega*sum[i];
2039       */
2040       for (i=0; i<m; i++)
2041         t[i] = b[i];
2042 
2043       for (i=0; i<m-1; i++){  /* update rhs */
2044         v  = aa + ai[i] + 1;
2045         vj = aj + ai[i] + 1;
2046         nz = ai[i+1] - ai[i] - 1;
2047         while (nz--) t[*vj++] -= x[i]*(*v++);
2048         PetscLogFlops(2*nz-1);
2049       }
2050       for (i=m-1; i>=0; i--){
2051         d  = *(aa + ai[i]);
2052         v  = aa + ai[i] + 1;
2053         vj = aj + ai[i] + 1;
2054         nz = ai[i+1] - ai[i] - 1;
2055         sum = t[i];
2056         while (nz--) sum -= x[*vj++]*(*v++);
2057         PetscLogFlops(2*nz-1);
2058         x[i] =   (1-omega)*x[i] + omega*sum/d;
2059       }
2060     }
2061   }
2062 
2063   ierr = PetscFree(t); CHKERRQ(ierr);
2064   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2065   if (bb != xx) {
2066     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2067   }
2068 
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 
2073 
2074 
2075 
2076 
2077