xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 63bb2aa60f4717cbfb86ebed81540aa885ea575c)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 /* extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); */
50 
51 #undef __FUNCT__
52 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
53 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
54 {
55   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
56 
57   PetscFunctionBegin;
58 
59   if (ia) {
60     SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
61   }
62 
63 #ifdef NEW
64   int         ierr;
65 
66   if (!ia) PetscFunctionReturn(0);
67 
68 /*
69   if (symmetric) {
70     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
71   } else if (oshift == 1) {
72     int nz = a->i[n];
73     for (i=0; i<nz; i++) a->j[i]++;
74     for (i=0; i<n+1; i++) a->i[i]++;
75     *ia = a->i; *ja = a->j;
76   } else {
77 */
78     *ia = a->i; *ja = a->j;
79     /* } */
80 #endif
81   *nn = a->mbs;
82   PetscFunctionReturn(0);
83 }
84 
85 #undef __FUNCT__
86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
87 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
88 {
89   PetscFunctionBegin;
90   if (!ia) PetscFunctionReturn(0);
91   SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
92   /* PetscFunctionReturn(0); */
93 }
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
97 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
98 {
99   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
100 
101   PetscFunctionBegin;
102   *bs = sbaij->bs;
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
108 int MatDestroy_SeqSBAIJ(Mat A)
109 {
110   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
111   int         ierr;
112 
113   PetscFunctionBegin;
114 #if defined(PETSC_USE_LOG)
115   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
116 #endif
117   ierr = PetscFree(a->a);CHKERRQ(ierr);
118   if (!a->singlemalloc) {
119     ierr = PetscFree(a->i);CHKERRQ(ierr);
120     ierr = PetscFree(a->j);CHKERRQ(ierr);
121   }
122   if (a->row) {
123     ierr = ISDestroy(a->row);CHKERRQ(ierr);
124   }
125   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
126   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
127   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
128   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
129   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
130   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
131   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
132 
133   if (a->inew){
134     ierr = PetscFree(a->inew);CHKERRQ(ierr);
135     a->inew = 0;
136   }
137   ierr = PetscFree(a);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
143 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
144 {
145   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
146 
147   PetscFunctionBegin;
148   switch (op) {
149   case MAT_ROW_ORIENTED:
150     a->roworiented    = PETSC_TRUE;
151     break;
152   case MAT_COLUMN_ORIENTED:
153     a->roworiented    = PETSC_FALSE;
154     break;
155   case MAT_COLUMNS_SORTED:
156     a->sorted         = PETSC_TRUE;
157     break;
158   case MAT_COLUMNS_UNSORTED:
159     a->sorted         = PETSC_FALSE;
160     break;
161   case MAT_KEEP_ZEROED_ROWS:
162     a->keepzeroedrows = PETSC_TRUE;
163     break;
164   case MAT_NO_NEW_NONZERO_LOCATIONS:
165     a->nonew          = 1;
166     break;
167   case MAT_NEW_NONZERO_LOCATION_ERR:
168     a->nonew          = -1;
169     break;
170   case MAT_NEW_NONZERO_ALLOCATION_ERR:
171     a->nonew          = -2;
172     break;
173   case MAT_YES_NEW_NONZERO_LOCATIONS:
174     a->nonew          = 0;
175     break;
176   case MAT_ROWS_SORTED:
177   case MAT_ROWS_UNSORTED:
178   case MAT_YES_NEW_DIAGONALS:
179   case MAT_IGNORE_OFF_PROC_ENTRIES:
180   case MAT_USE_HASH_TABLE:
181   case MAT_USE_SINGLE_PRECISION_SOLVES:
182     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
183     break;
184   case MAT_NO_NEW_DIAGONALS:
185     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
186   default:
187     SETERRQ(PETSC_ERR_SUP,"unknown option");
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
194 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
195 {
196   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
197   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
198   MatScalar    *aa,*aa_i;
199   PetscScalar  *v_i;
200 
201   PetscFunctionBegin;
202   bs  = a->bs;
203   ai  = a->i;
204   aj  = a->j;
205   aa  = a->a;
206   bs2 = a->bs2;
207 
208   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
209 
210   bn  = row/bs;   /* Block number */
211   bp  = row % bs; /* Block position */
212   M   = ai[bn+1] - ai[bn];
213   *ncols = bs*M;
214 
215   if (v) {
216     *v = 0;
217     if (*ncols) {
218       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
219       for (i=0; i<M; i++) { /* for each block in the block row */
220         v_i  = *v + i*bs;
221         aa_i = aa + bs2*(ai[bn] + i);
222         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
223       }
224     }
225   }
226 
227   if (cols) {
228     *cols = 0;
229     if (*ncols) {
230       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
231       for (i=0; i<M; i++) { /* for each block in the block row */
232         cols_i = *cols + i*bs;
233         itmp  = bs*aj[ai[bn] + i];
234         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
235       }
236     }
237   }
238 
239   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
240   /* this segment is currently removed, so only entries in the upper triangle are obtained */
241 #ifdef column_search
242   v_i    = *v    + M*bs;
243   cols_i = *cols + M*bs;
244   for (i=0; i<bn; i++){ /* for each block row */
245     M = ai[i+1] - ai[i];
246     for (j=0; j<M; j++){
247       itmp = aj[ai[i] + j];    /* block column value */
248       if (itmp == bn){
249         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
250         for (k=0; k<bs; k++) {
251           *cols_i++ = i*bs+k;
252           *v_i++    = aa_i[k];
253         }
254         *ncols += bs;
255         break;
256       }
257     }
258   }
259 #endif
260 
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
266 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
267 {
268   int ierr;
269 
270   PetscFunctionBegin;
271   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
272   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
278 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
279 {
280   int ierr;
281   PetscFunctionBegin;
282   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
288 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
289 {
290   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
291   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
292   PetscScalar  *aa;
293   FILE         *file;
294 
295   PetscFunctionBegin;
296   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
297   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
298   col_lens[0] = MAT_FILE_COOKIE;
299 
300   col_lens[1] = A->m;
301   col_lens[2] = A->m;
302   col_lens[3] = a->s_nz*bs2;
303 
304   /* store lengths of each row and write (including header) to file */
305   count = 0;
306   for (i=0; i<a->mbs; i++) {
307     for (j=0; j<bs; j++) {
308       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
309     }
310   }
311   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
312   ierr = PetscFree(col_lens);CHKERRQ(ierr);
313 
314   /* store column indices (zero start index) */
315   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
316   count = 0;
317   for (i=0; i<a->mbs; i++) {
318     for (j=0; j<bs; j++) {
319       for (k=a->i[i]; k<a->i[i+1]; k++) {
320         for (l=0; l<bs; l++) {
321           jj[count++] = bs*a->j[k] + l;
322         }
323       }
324     }
325   }
326   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
327   ierr = PetscFree(jj);CHKERRQ(ierr);
328 
329   /* store nonzero values */
330   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
331   count = 0;
332   for (i=0; i<a->mbs; i++) {
333     for (j=0; j<bs; j++) {
334       for (k=a->i[i]; k<a->i[i+1]; k++) {
335         for (l=0; l<bs; l++) {
336           aa[count++] = a->a[bs2*k + l*bs + j];
337         }
338       }
339     }
340   }
341   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
342   ierr = PetscFree(aa);CHKERRQ(ierr);
343 
344   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
345   if (file) {
346     fprintf(file,"-matload_block_size %d\n",a->bs);
347   }
348   PetscFunctionReturn(0);
349 }
350 
351 #undef __FUNCT__
352 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
353 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
354 {
355   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
356   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
357   char              *name;
358   PetscViewerFormat format;
359 
360   PetscFunctionBegin;
361   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
362   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
363   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
364     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
365   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
366     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
367   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
368     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
369     for (i=0; i<a->mbs; i++) {
370       for (j=0; j<bs; j++) {
371         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
372         for (k=a->i[i]; k<a->i[i+1]; k++) {
373           for (l=0; l<bs; l++) {
374 #if defined(PETSC_USE_COMPLEX)
375             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
376               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
377                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
378             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
379               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
380                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
381             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
383             }
384 #else
385             if (a->a[bs2*k + l*bs + j] != 0.0) {
386               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
387             }
388 #endif
389           }
390         }
391         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
392       }
393     }
394     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
395   } else {
396     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
397     for (i=0; i<a->mbs; i++) {
398       for (j=0; j<bs; j++) {
399         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
400         for (k=a->i[i]; k<a->i[i+1]; k++) {
401           for (l=0; l<bs; l++) {
402 #if defined(PETSC_USE_COMPLEX)
403             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
404               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
405                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
406             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
407               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
408                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
409             } else {
410               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
411             }
412 #else
413             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
414 #endif
415           }
416         }
417         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
418       }
419     }
420     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
421   }
422   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
428 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
429 {
430   Mat          A = (Mat) Aa;
431   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
432   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
433   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
434   MatScalar    *aa;
435   MPI_Comm     comm;
436   PetscViewer  viewer;
437 
438   PetscFunctionBegin;
439   /*
440       This is nasty. If this is called from an originally parallel matrix
441    then all processes call this,but only the first has the matrix so the
442    rest should return immediately.
443   */
444   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
445   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
446   if (rank) PetscFunctionReturn(0);
447 
448   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
449 
450   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
451   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
452 
453   /* loop over matrix elements drawing boxes */
454   color = PETSC_DRAW_BLUE;
455   for (i=0,row=0; i<mbs; i++,row+=bs) {
456     for (j=a->i[i]; j<a->i[i+1]; j++) {
457       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
458       x_l = a->j[j]*bs; x_r = x_l + 1.0;
459       aa = a->a + j*bs2;
460       for (k=0; k<bs; k++) {
461         for (l=0; l<bs; l++) {
462           if (PetscRealPart(*aa++) >=  0.) continue;
463           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
464         }
465       }
466     }
467   }
468   color = PETSC_DRAW_CYAN;
469   for (i=0,row=0; i<mbs; i++,row+=bs) {
470     for (j=a->i[i]; j<a->i[i+1]; j++) {
471       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
472       x_l = a->j[j]*bs; x_r = x_l + 1.0;
473       aa = a->a + j*bs2;
474       for (k=0; k<bs; k++) {
475         for (l=0; l<bs; l++) {
476           if (PetscRealPart(*aa++) != 0.) continue;
477           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
478         }
479       }
480     }
481   }
482 
483   color = PETSC_DRAW_RED;
484   for (i=0,row=0; i<mbs; i++,row+=bs) {
485     for (j=a->i[i]; j<a->i[i+1]; j++) {
486       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
487       x_l = a->j[j]*bs; x_r = x_l + 1.0;
488       aa = a->a + j*bs2;
489       for (k=0; k<bs; k++) {
490         for (l=0; l<bs; l++) {
491           if (PetscRealPart(*aa++) <= 0.) continue;
492           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
493         }
494       }
495     }
496   }
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
502 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
503 {
504   int          ierr;
505   PetscReal    xl,yl,xr,yr,w,h;
506   PetscDraw    draw;
507   PetscTruth   isnull;
508 
509   PetscFunctionBegin;
510 
511   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
512   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
513 
514   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
515   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
516   xr += w;    yr += h;  xl = -w;     yl = -h;
517   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
518   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
519   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatView_SeqSBAIJ"
525 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
526 {
527   int        ierr;
528   PetscTruth issocket,isascii,isbinary,isdraw;
529 
530   PetscFunctionBegin;
531   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
532   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
533   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
534   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
535   if (issocket) {
536     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
537   } else if (isascii){
538     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
539   } else if (isbinary) {
540     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
541   } else if (isdraw) {
542     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
543   } else {
544     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
545   }
546   PetscFunctionReturn(0);
547 }
548 
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
552 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
553 {
554   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
555   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
556   int        *ai = a->i,*ailen = a->ilen;
557   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
558   MatScalar  *ap,*aa = a->a,zero = 0.0;
559 
560   PetscFunctionBegin;
561   for (k=0; k<m; k++) { /* loop over rows */
562     row  = im[k]; brow = row/bs;
563     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
564     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
565     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
566     nrow = ailen[brow];
567     for (l=0; l<n; l++) { /* loop over columns */
568       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
569       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
570       col  = in[l] ;
571       bcol = col/bs;
572       cidx = col%bs;
573       ridx = row%bs;
574       high = nrow;
575       low  = 0; /* assume unsorted */
576       while (high-low > 5) {
577         t = (low+high)/2;
578         if (rp[t] > bcol) high = t;
579         else             low  = t;
580       }
581       for (i=low; i<high; i++) {
582         if (rp[i] > bcol) break;
583         if (rp[i] == bcol) {
584           *v++ = ap[bs2*i+bs*cidx+ridx];
585           goto finished;
586         }
587       }
588       *v++ = zero;
589       finished:;
590     }
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 
596 #undef __FUNCT__
597 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
598 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
599 {
600   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
601   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
602   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
603   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
604   PetscTruth  roworiented=a->roworiented;
605   MatScalar   *value = v,*ap,*aa = a->a,*bap;
606 
607   PetscFunctionBegin;
608   if (roworiented) {
609     stepval = (n-1)*bs;
610   } else {
611     stepval = (m-1)*bs;
612   }
613   for (k=0; k<m; k++) { /* loop over added rows */
614     row  = im[k];
615     if (row < 0) continue;
616 #if defined(PETSC_USE_BOPT_g)
617     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
618 #endif
619     rp   = aj + ai[row];
620     ap   = aa + bs2*ai[row];
621     rmax = imax[row];
622     nrow = ailen[row];
623     low  = 0;
624     for (l=0; l<n; l++) { /* loop over added columns */
625       if (in[l] < 0) continue;
626 #if defined(PETSC_USE_BOPT_g)
627       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
628 #endif
629       col = in[l];
630       if (col < row) continue; /* ignore lower triangular block */
631       if (roworiented) {
632         value = v + k*(stepval+bs)*bs + l*bs;
633       } else {
634         value = v + l*(stepval+bs)*bs + k*bs;
635       }
636       if (!sorted) low = 0; high = nrow;
637       while (high-low > 7) {
638         t = (low+high)/2;
639         if (rp[t] > col) high = t;
640         else             low  = t;
641       }
642       for (i=low; i<high; i++) {
643         if (rp[i] > col) break;
644         if (rp[i] == col) {
645           bap  = ap +  bs2*i;
646           if (roworiented) {
647             if (is == ADD_VALUES) {
648               for (ii=0; ii<bs; ii++,value+=stepval) {
649                 for (jj=ii; jj<bs2; jj+=bs) {
650                   bap[jj] += *value++;
651                 }
652               }
653             } else {
654               for (ii=0; ii<bs; ii++,value+=stepval) {
655                 for (jj=ii; jj<bs2; jj+=bs) {
656                   bap[jj] = *value++;
657                 }
658               }
659             }
660           } else {
661             if (is == ADD_VALUES) {
662               for (ii=0; ii<bs; ii++,value+=stepval) {
663                 for (jj=0; jj<bs; jj++) {
664                   *bap++ += *value++;
665                 }
666               }
667             } else {
668               for (ii=0; ii<bs; ii++,value+=stepval) {
669                 for (jj=0; jj<bs; jj++) {
670                   *bap++  = *value++;
671                 }
672               }
673             }
674           }
675           goto noinsert2;
676         }
677       }
678       if (nonew == 1) goto noinsert2;
679       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
680       if (nrow >= rmax) {
681         /* there is no extra room in row, therefore enlarge */
682         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
683         MatScalar *new_a;
684 
685         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
686 
687         /* malloc new storage space */
688         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
689 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
690         new_j   = (int*)(new_a + bs2*new_nz);
691         new_i   = new_j + new_nz;
692 
693         /* copy over old data into new slots */
694         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
695         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
696         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
697         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
698         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
699         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
700         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
701         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
702         /* free up old matrix storage */
703         ierr = PetscFree(a->a);CHKERRQ(ierr);
704         if (!a->singlemalloc) {
705           ierr = PetscFree(a->i);CHKERRQ(ierr);
706           ierr = PetscFree(a->j);CHKERRQ(ierr);
707         }
708         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
709         a->singlemalloc = PETSC_TRUE;
710 
711         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
712         rmax = imax[row] = imax[row] + CHUNKSIZE;
713         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
714         a->s_maxnz += bs2*CHUNKSIZE;
715         a->reallocs++;
716         a->s_nz++;
717       }
718       N = nrow++ - 1;
719       /* shift up all the later entries in this row */
720       for (ii=N; ii>=i; ii--) {
721         rp[ii+1] = rp[ii];
722         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
723       }
724       if (N >= i) {
725         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
726       }
727       rp[i] = col;
728       bap   = ap +  bs2*i;
729       if (roworiented) {
730         for (ii=0; ii<bs; ii++,value+=stepval) {
731           for (jj=ii; jj<bs2; jj+=bs) {
732             bap[jj] = *value++;
733           }
734         }
735       } else {
736         for (ii=0; ii<bs; ii++,value+=stepval) {
737           for (jj=0; jj<bs; jj++) {
738             *bap++  = *value++;
739           }
740         }
741       }
742       noinsert2:;
743       low = i;
744     }
745     ailen[row] = nrow;
746   }
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
752 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
753 {
754   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
755   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
756   int        m = A->m,*ip,N,*ailen = a->ilen;
757   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
758   MatScalar  *aa = a->a,*ap;
759 #if defined(PETSC_HAVE_SPOOLES)
760   PetscTruth   flag;
761 #endif
762 
763   PetscFunctionBegin;
764   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
765 
766   if (m) rmax = ailen[0];
767   for (i=1; i<mbs; i++) {
768     /* move each row back by the amount of empty slots (fshift) before it*/
769     fshift += imax[i-1] - ailen[i-1];
770     rmax   = PetscMax(rmax,ailen[i]);
771     if (fshift) {
772       ip = aj + ai[i]; ap = aa + bs2*ai[i];
773       N = ailen[i];
774       for (j=0; j<N; j++) {
775         ip[j-fshift] = ip[j];
776         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
777       }
778     }
779     ai[i] = ai[i-1] + ailen[i-1];
780   }
781   if (mbs) {
782     fshift += imax[mbs-1] - ailen[mbs-1];
783     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
784   }
785   /* reset ilen and imax for each row */
786   for (i=0; i<mbs; i++) {
787     ailen[i] = imax[i] = ai[i+1] - ai[i];
788   }
789   a->s_nz = ai[mbs];
790 
791   /* diagonals may have moved, reset it */
792   if (a->diag) {
793     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
794   }
795   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
796            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
797   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
798            a->reallocs);
799   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
800   a->reallocs          = 0;
801   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
802 
803 #if defined(PETSC_HAVE_SPOOLES)
804   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
805   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); }
806 #endif
807 
808   PetscFunctionReturn(0);
809 }
810 
811 /*
812    This function returns an array of flags which indicate the locations of contiguous
813    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
814    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
815    Assume: sizes should be long enough to hold all the values.
816 */
817 #undef __FUNCT__
818 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
819 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
820 {
821   int        i,j,k,row;
822   PetscTruth flg;
823 
824   PetscFunctionBegin;
825   for (i=0,j=0; i<n; j++) {
826     row = idx[i];
827     if (row%bs!=0) { /* Not the begining of a block */
828       sizes[j] = 1;
829       i++;
830     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
831       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
832       i++;
833     } else { /* Begining of the block, so check if the complete block exists */
834       flg = PETSC_TRUE;
835       for (k=1; k<bs; k++) {
836         if (row+k != idx[i+k]) { /* break in the block */
837           flg = PETSC_FALSE;
838           break;
839         }
840       }
841       if (flg == PETSC_TRUE) { /* No break in the bs */
842         sizes[j] = bs;
843         i+= bs;
844       } else {
845         sizes[j] = 1;
846         i++;
847       }
848     }
849   }
850   *bs_max = j;
851   PetscFunctionReturn(0);
852 }
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
856 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag)
857 {
858   Mat_SeqSBAIJ  *sbaij=(Mat_SeqSBAIJ*)A->data;
859   int           ierr,i,j,k,count,is_n,*is_idx,*rows;
860   int           bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max;
861   PetscScalar   zero = 0.0;
862   MatScalar     *aa;
863 
864   PetscFunctionBegin;
865   /* Make a copy of the IS and  sort it */
866   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
867   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
868 
869   /* allocate memory for rows,sizes */
870   ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
871   sizes = rows + is_n;
872 
873   /* initialize copy IS values to rows, and sort them */
874   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
875   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
876   if (sbaij->keepzeroedrows) { /* do not change nonzero structure */
877     for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */
878     bs_max = is_n;            /* bs_max: num. of contiguous block row in the row */
879   } else {
880     ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
881   }
882   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
883 
884   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
885     row   = rows[j];                  /* row to be zeroed */
886     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
887     count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */
888     aa    = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs);
889     if (sizes[i] == bs && !sbaij->keepzeroedrows) {
890       if (diag) {
891         if (sbaij->ilen[row/bs] > 0) {
892           sbaij->ilen[row/bs] = 1;
893           sbaij->j[sbaij->i[row/bs]] = row/bs;
894           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
895         }
896         /* Now insert all the diagoanl values for this bs */
897         for (k=0; k<bs; k++) {
898           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
899         }
900       } else { /* (!diag) */
901         sbaij->ilen[row/bs] = 0;
902       } /* end (!diag) */
903     } else { /* (sizes[i] != bs), broken block */
904 #if defined (PETSC_USE_DEBUG)
905       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
906 #endif
907       for (k=0; k<count; k++) {
908         aa[0] = zero;
909         aa+=bs;
910       }
911       if (diag) {
912         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
913       }
914     }
915   }
916 
917   ierr = PetscFree(rows);CHKERRQ(ierr);
918   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
919   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
920   PetscFunctionReturn(0);
921 }
922 
923 /* Only add/insert a(i,j) with i<=j (blocks).
924    Any a(i,j) with i>j input by user is ingored.
925 */
926 
927 #undef __FUNCT__
928 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
929 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
930 {
931   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
932   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
933   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
934   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
935   int         ridx,cidx,bs2=a->bs2,ierr;
936   MatScalar   *ap,value,*aa=a->a,*bap;
937 
938   PetscFunctionBegin;
939 
940   for (k=0; k<m; k++) { /* loop over added rows */
941     row  = im[k];       /* row number */
942     brow = row/bs;      /* block row number */
943     if (row < 0) continue;
944 #if defined(PETSC_USE_BOPT_g)
945     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
946 #endif
947     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
948     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
949     rmax = imax[brow];  /* maximum space allocated for this row */
950     nrow = ailen[brow]; /* actual length of this row */
951     low  = 0;
952 
953     for (l=0; l<n; l++) { /* loop over added columns */
954       if (in[l] < 0) continue;
955 #if defined(PETSC_USE_BOPT_g)
956       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
957 #endif
958       col = in[l];
959       bcol = col/bs;              /* block col number */
960 
961       if (brow <= bcol){
962         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
963         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
964           /* element value a(k,l) */
965           if (roworiented) {
966             value = v[l + k*n];
967           } else {
968             value = v[k + l*m];
969           }
970 
971           /* move pointer bap to a(k,l) quickly and add/insert value */
972           if (!sorted) low = 0; high = nrow;
973           while (high-low > 7) {
974             t = (low+high)/2;
975             if (rp[t] > bcol) high = t;
976             else              low  = t;
977           }
978           for (i=low; i<high; i++) {
979             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
980             if (rp[i] > bcol) break;
981             if (rp[i] == bcol) {
982               bap  = ap +  bs2*i + bs*cidx + ridx;
983               if (is == ADD_VALUES) *bap += value;
984               else                  *bap  = value;
985               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
986               if (brow == bcol && ridx < cidx){
987                 bap  = ap +  bs2*i + bs*ridx + cidx;
988                 if (is == ADD_VALUES) *bap += value;
989                 else                  *bap  = value;
990               }
991               goto noinsert1;
992             }
993           }
994 
995       if (nonew == 1) goto noinsert1;
996       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
997       if (nrow >= rmax) {
998         /* there is no extra room in row, therefore enlarge */
999         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1000         MatScalar *new_a;
1001 
1002         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1003 
1004         /* Malloc new storage space */
1005         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1006         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1007         new_j = (int*)(new_a + bs2*new_nz);
1008         new_i = new_j + new_nz;
1009 
1010         /* copy over old data into new slots */
1011         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1012         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1013         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1014         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1015         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1016         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1017         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1018         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1019         /* free up old matrix storage */
1020         ierr = PetscFree(a->a);CHKERRQ(ierr);
1021         if (!a->singlemalloc) {
1022           ierr = PetscFree(a->i);CHKERRQ(ierr);
1023           ierr = PetscFree(a->j);CHKERRQ(ierr);
1024         }
1025         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1026         a->singlemalloc = PETSC_TRUE;
1027 
1028         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1029         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1030         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1031         a->s_maxnz += bs2*CHUNKSIZE;
1032         a->reallocs++;
1033         a->s_nz++;
1034       }
1035 
1036       N = nrow++ - 1;
1037       /* shift up all the later entries in this row */
1038       for (ii=N; ii>=i; ii--) {
1039         rp[ii+1] = rp[ii];
1040         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1041       }
1042       if (N>=i) {
1043         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1044       }
1045       rp[i]                      = bcol;
1046       ap[bs2*i + bs*cidx + ridx] = value;
1047       noinsert1:;
1048       low = i;
1049       /* } */
1050         }
1051       } /* end of if .. if..  */
1052     }                     /* end of loop over added columns */
1053     ailen[brow] = nrow;
1054   }                       /* end of loop over added rows */
1055 
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
1060 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
1061 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
1062 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1063 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1064 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
1065 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
1066 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat);
1067 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
1068 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1069 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1070 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1071 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1072 extern int MatZeroEntries_SeqSBAIJ(Mat);
1073 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1074 
1075 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1076 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1077 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1078 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1079 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1080 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1081 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1082 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1083 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1084 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1085 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1086 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1087 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1088 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1089 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1090 
1091 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1092 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1093 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1094 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1095 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1096 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1097 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1098 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1099 
1100 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1101 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1102 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1103 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1104 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1105 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1106 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1107 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1108 
1109 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1110 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1111 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1112 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1113 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1114 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1115 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1116 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1117 
1118 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1119 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1120 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1121 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1122 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1123 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1124 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1125 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1126 
1127 #ifdef HAVE_MatICCFactor
1128 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1131 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
1132 {
1133   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1134   Mat         outA;
1135   int         ierr;
1136   PetscTruth  row_identity,col_identity;
1137 
1138   PetscFunctionBegin;
1139   /*
1140   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1141   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1142   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1143   if (!row_identity || !col_identity) {
1144     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1145   }
1146   */
1147 
1148   outA          = inA;
1149   inA->factor   = FACTOR_CHOLESKY;
1150 
1151   if (!a->diag) {
1152     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1153   }
1154   /*
1155       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1156       for ILU(0) factorization with natural ordering
1157   */
1158   switch (a->bs) {
1159   case 1:
1160     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1161     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1162     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1163   case 2:
1164     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1165     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1166     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1167     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1168     break;
1169   case 3:
1170     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1171     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1172     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1173     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1174     break;
1175   case 4:
1176     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1177     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1178     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1179     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1180     break;
1181   case 5:
1182     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1183     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1184     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1185     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1186     break;
1187   case 6:
1188     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1189     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1190     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1191     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1192     break;
1193   case 7:
1194     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1195     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1196     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1197     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1198     break;
1199   default:
1200     a->row        = row;
1201     a->icol       = col;
1202     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1203     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1204 
1205     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1206     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1207     PetscLogObjectParent(inA,a->icol);
1208 
1209     if (!a->solve_work) {
1210       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1211       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1212     }
1213   }
1214 
1215   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1216 
1217   PetscFunctionReturn(0);
1218 }
1219 #endif
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1223 int MatPrintHelp_SeqSBAIJ(Mat A)
1224 {
1225   static PetscTruth called = PETSC_FALSE;
1226   MPI_Comm          comm = A->comm;
1227   int               ierr;
1228 
1229   PetscFunctionBegin;
1230   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1231   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1232   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 EXTERN_C_BEGIN
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1239 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1240 {
1241   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1242   int         i,nz,n;
1243 
1244   PetscFunctionBegin;
1245   nz = baij->s_maxnz;
1246   n  = mat->n;
1247   for (i=0; i<nz; i++) {
1248     baij->j[i] = indices[i];
1249   }
1250   baij->s_nz = nz;
1251   for (i=0; i<n; i++) {
1252     baij->ilen[i] = baij->imax[i];
1253   }
1254 
1255   PetscFunctionReturn(0);
1256 }
1257 EXTERN_C_END
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1261 /*@
1262     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1263        in the matrix.
1264 
1265   Input Parameters:
1266 +  mat     - the SeqSBAIJ matrix
1267 -  indices - the column indices
1268 
1269   Level: advanced
1270 
1271   Notes:
1272     This can be called if you have precomputed the nonzero structure of the
1273   matrix and want to provide it to the matrix object to improve the performance
1274   of the MatSetValues() operation.
1275 
1276     You MUST have set the correct numbers of nonzeros per row in the call to
1277   MatCreateSeqSBAIJ().
1278 
1279     MUST be called before any calls to MatSetValues()
1280 
1281 .seealso: MatCreateSeqSBAIJ
1282 @*/
1283 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1284 {
1285   int ierr,(*f)(Mat,int *);
1286 
1287   PetscFunctionBegin;
1288   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1289   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1290   if (f) {
1291     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1292   } else {
1293     SETERRQ(1,"Wrong type of matrix to set column indices");
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1300 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1301 {
1302   int        ierr;
1303 
1304   PetscFunctionBegin;
1305   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #include "petscblaslapack.h"
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1312 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1313 {
1314   int          ierr,one=1;
1315   Mat_SeqSBAIJ *x  = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data;
1316 
1317   PetscFunctionBegin;
1318   if (str == SAME_NONZERO_PATTERN) {
1319     BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one);
1320   } else {
1321     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 /* -------------------------------------------------------------------*/
1327 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1328        MatGetRow_SeqSBAIJ,
1329        MatRestoreRow_SeqSBAIJ,
1330        MatMult_SeqSBAIJ_N,
1331        MatMultAdd_SeqSBAIJ_N,
1332        MatMultTranspose_SeqSBAIJ,
1333        MatMultTransposeAdd_SeqSBAIJ,
1334        MatSolve_SeqSBAIJ_N,
1335        0,
1336        0,
1337        0,
1338        0,
1339        MatCholeskyFactor_SeqSBAIJ,
1340        MatRelax_SeqSBAIJ,
1341        MatTranspose_SeqSBAIJ,
1342        MatGetInfo_SeqSBAIJ,
1343        MatEqual_SeqSBAIJ,
1344        MatGetDiagonal_SeqSBAIJ,
1345        MatDiagonalScale_SeqSBAIJ,
1346        MatNorm_SeqSBAIJ,
1347        0,
1348        MatAssemblyEnd_SeqSBAIJ,
1349        0,
1350        MatSetOption_SeqSBAIJ,
1351        MatZeroEntries_SeqSBAIJ,
1352        MatZeroRows_SeqSBAIJ,
1353        0,
1354        0,
1355        MatCholeskyFactorSymbolic_SeqSBAIJ,
1356        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1357        MatSetUpPreallocation_SeqSBAIJ,
1358        0,
1359        MatICCFactorSymbolic_SeqSBAIJ,
1360        0,
1361        0,
1362        MatDuplicate_SeqSBAIJ,
1363        0,
1364        0,
1365        0,
1366        0,
1367        MatAXPY_SeqSBAIJ,
1368        MatGetSubMatrices_SeqSBAIJ,
1369        MatIncreaseOverlap_SeqSBAIJ,
1370        MatGetValues_SeqSBAIJ,
1371        0,
1372        MatPrintHelp_SeqSBAIJ,
1373        MatScale_SeqSBAIJ,
1374        0,
1375        0,
1376        0,
1377        MatGetBlockSize_SeqSBAIJ,
1378        MatGetRowIJ_SeqSBAIJ,
1379        MatRestoreRowIJ_SeqSBAIJ,
1380        0,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386        MatSetValuesBlocked_SeqSBAIJ,
1387        MatGetSubMatrix_SeqSBAIJ,
1388        0,
1389        0,
1390        MatGetPetscMaps_Petsc,
1391        0,
1392        0,
1393        0,
1394        0,
1395        0,
1396        0,
1397        MatGetRowMax_SeqSBAIJ};
1398 
1399 EXTERN_C_BEGIN
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1402 int MatStoreValues_SeqSBAIJ(Mat mat)
1403 {
1404   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1405   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1406   int         ierr;
1407 
1408   PetscFunctionBegin;
1409   if (aij->nonew != 1) {
1410     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1411   }
1412 
1413   /* allocate space for values if not already there */
1414   if (!aij->saved_values) {
1415     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1416   }
1417 
1418   /* copy values over */
1419   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 EXTERN_C_END
1423 
1424 EXTERN_C_BEGIN
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1427 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1428 {
1429   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1430   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1431 
1432   PetscFunctionBegin;
1433   if (aij->nonew != 1) {
1434     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1435   }
1436   if (!aij->saved_values) {
1437     SETERRQ(1,"Must call MatStoreValues(A);first");
1438   }
1439 
1440   /* copy values over */
1441   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 EXTERN_C_END
1445 
1446 EXTERN_C_BEGIN
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1449 int MatCreate_SeqSBAIJ(Mat B)
1450 {
1451   Mat_SeqSBAIJ *b;
1452   int          ierr,size;
1453 
1454   PetscFunctionBegin;
1455   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1456   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1457   B->m = B->M = PetscMax(B->m,B->M);
1458   B->n = B->N = PetscMax(B->n,B->N);
1459 
1460   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1461   B->data = (void*)b;
1462   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1463   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1464   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1465   B->ops->view        = MatView_SeqSBAIJ;
1466   B->factor           = 0;
1467   B->lupivotthreshold = 1.0;
1468   B->mapping          = 0;
1469   b->row              = 0;
1470   b->icol             = 0;
1471   b->reallocs         = 0;
1472   b->saved_values     = 0;
1473 
1474   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1475   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1476 
1477   b->sorted           = PETSC_FALSE;
1478   b->roworiented      = PETSC_TRUE;
1479   b->nonew            = 0;
1480   b->diag             = 0;
1481   b->solve_work       = 0;
1482   b->mult_work        = 0;
1483   B->spptr            = 0;
1484   b->keepzeroedrows   = PETSC_FALSE;
1485 
1486   b->inew             = 0;
1487   b->jnew             = 0;
1488   b->anew             = 0;
1489   b->a2anew           = 0;
1490   b->permute          = PETSC_FALSE;
1491 
1492   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1493                                      "MatStoreValues_SeqSBAIJ",
1494                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1495   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1496                                      "MatRetrieveValues_SeqSBAIJ",
1497                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1499                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1500                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 EXTERN_C_END
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1507 /*@C
1508    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1509    compressed row) format.  For good matrix assembly performance the
1510    user should preallocate the matrix storage by setting the parameter nz
1511    (or the array nnz).  By setting these parameters accurately, performance
1512    during matrix assembly can be increased by more than a factor of 50.
1513 
1514    Collective on Mat
1515 
1516    Input Parameters:
1517 +  A - the symmetric matrix
1518 .  bs - size of block
1519 .  nz - number of block nonzeros per block row (same for all rows)
1520 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1521          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1522 
1523    Options Database Keys:
1524 .   -mat_no_unroll - uses code that does not unroll the loops in the
1525                      block calculations (much slower)
1526 .    -mat_block_size - size of the blocks to use
1527 
1528    Level: intermediate
1529 
1530    Notes:
1531    Specify the preallocated storage with either nz or nnz (not both).
1532    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1533    allocation.  For additional details, see the users manual chapter on
1534    matrices.
1535 
1536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1537 @*/
1538 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1539 {
1540   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1541   int          i,len,ierr,mbs,bs2;
1542   PetscTruth   flg;
1543   int          s_nz;
1544 
1545   PetscFunctionBegin;
1546   B->preallocated = PETSC_TRUE;
1547   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1548   mbs  = B->m/bs;
1549   bs2  = bs*bs;
1550 
1551   if (mbs*bs != B->m) {
1552     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1553   }
1554 
1555   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1556   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1557   if (nnz) {
1558     for (i=0; i<mbs; i++) {
1559       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1560       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1561     }
1562   }
1563 
1564   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1565   if (!flg) {
1566     switch (bs) {
1567     case 1:
1568       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1569       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1570       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1571       B->ops->mult            = MatMult_SeqSBAIJ_1;
1572       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1573       break;
1574     case 2:
1575       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1576       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1577       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1578       B->ops->mult            = MatMult_SeqSBAIJ_2;
1579       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1580       break;
1581     case 3:
1582       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1583       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1584       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1585       B->ops->mult            = MatMult_SeqSBAIJ_3;
1586       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1587       break;
1588     case 4:
1589       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1590       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1591       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1592       B->ops->mult            = MatMult_SeqSBAIJ_4;
1593       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1594       break;
1595     case 5:
1596       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1597       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1598       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1599       B->ops->mult            = MatMult_SeqSBAIJ_5;
1600       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1601       break;
1602     case 6:
1603       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1604       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1605       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1606       B->ops->mult            = MatMult_SeqSBAIJ_6;
1607       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1608       break;
1609     case 7:
1610       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1611       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1612       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1613       B->ops->mult            = MatMult_SeqSBAIJ_7;
1614       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1615       break;
1616     }
1617   }
1618 
1619   b->mbs = mbs;
1620   b->nbs = mbs;
1621   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1622   if (!nnz) {
1623     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1624     else if (nz <= 0)        nz = 1;
1625     for (i=0; i<mbs; i++) {
1626       b->imax[i] = nz;
1627     }
1628     nz = nz*mbs; /* total nz */
1629   } else {
1630     nz = 0;
1631     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1632   }
1633   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1634   s_nz = nz;
1635 
1636   /* allocate the matrix space */
1637   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1638   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1639   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1640   b->j = (int*)(b->a + s_nz*bs2);
1641   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1642   b->i = b->j + s_nz;
1643   b->singlemalloc = PETSC_TRUE;
1644 
1645   /* pointer to beginning of each row */
1646   b->i[0] = 0;
1647   for (i=1; i<mbs+1; i++) {
1648     b->i[i] = b->i[i-1] + b->imax[i-1];
1649   }
1650 
1651   /* b->ilen will count nonzeros in each block row so far. */
1652   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1653   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1654   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1655 
1656   b->bs               = bs;
1657   b->bs2              = bs2;
1658   b->s_nz             = 0;
1659   b->s_maxnz          = s_nz*bs2;
1660 
1661   b->inew             = 0;
1662   b->jnew             = 0;
1663   b->anew             = 0;
1664   b->a2anew           = 0;
1665   b->permute          = PETSC_FALSE;
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatCreateSeqSBAIJ"
1672 /*@C
1673    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1674    compressed row) format.  For good matrix assembly performance the
1675    user should preallocate the matrix storage by setting the parameter nz
1676    (or the array nnz).  By setting these parameters accurately, performance
1677    during matrix assembly can be increased by more than a factor of 50.
1678 
1679    Collective on MPI_Comm
1680 
1681    Input Parameters:
1682 +  comm - MPI communicator, set to PETSC_COMM_SELF
1683 .  bs - size of block
1684 .  m - number of rows, or number of columns
1685 .  nz - number of block nonzeros per block row (same for all rows)
1686 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1687          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1688 
1689    Output Parameter:
1690 .  A - the symmetric matrix
1691 
1692    Options Database Keys:
1693 .   -mat_no_unroll - uses code that does not unroll the loops in the
1694                      block calculations (much slower)
1695 .    -mat_block_size - size of the blocks to use
1696 
1697    Level: intermediate
1698 
1699    Notes:
1700 
1701    Specify the preallocated storage with either nz or nnz (not both).
1702    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1703    allocation.  For additional details, see the users manual chapter on
1704    matrices.
1705 
1706 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1707 @*/
1708 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1709 {
1710   int ierr;
1711 
1712   PetscFunctionBegin;
1713   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1714   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1715   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1721 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1722 {
1723   Mat         C;
1724   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1725   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1726 
1727   PetscFunctionBegin;
1728   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1729 
1730   *B = 0;
1731   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1732   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1733   c    = (Mat_SeqSBAIJ*)C->data;
1734 
1735   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1736   C->preallocated   = PETSC_TRUE;
1737   C->factor         = A->factor;
1738   c->row            = 0;
1739   c->icol           = 0;
1740   c->saved_values   = 0;
1741   c->keepzeroedrows = a->keepzeroedrows;
1742   C->assembled      = PETSC_TRUE;
1743 
1744   c->bs         = a->bs;
1745   c->bs2        = a->bs2;
1746   c->mbs        = a->mbs;
1747   c->nbs        = a->nbs;
1748 
1749   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1750   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1751   for (i=0; i<mbs; i++) {
1752     c->imax[i] = a->imax[i];
1753     c->ilen[i] = a->ilen[i];
1754   }
1755 
1756   /* allocate the matrix space */
1757   c->singlemalloc = PETSC_TRUE;
1758   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1759   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1760   c->j = (int*)(c->a + nz*bs2);
1761   c->i = c->j + nz;
1762   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1763   if (mbs > 0) {
1764     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1765     if (cpvalues == MAT_COPY_VALUES) {
1766       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1767     } else {
1768       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1769     }
1770   }
1771 
1772   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1773   c->sorted      = a->sorted;
1774   c->roworiented = a->roworiented;
1775   c->nonew       = a->nonew;
1776 
1777   if (a->diag) {
1778     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1779     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1780     for (i=0; i<mbs; i++) {
1781       c->diag[i] = a->diag[i];
1782     }
1783   } else c->diag        = 0;
1784   c->s_nz               = a->s_nz;
1785   c->s_maxnz            = a->s_maxnz;
1786   c->solve_work         = 0;
1787   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1788   c->mult_work          = 0;
1789   *B = C;
1790   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 EXTERN_C_BEGIN
1795 #undef __FUNCT__
1796 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1797 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1798 {
1799   Mat_SeqSBAIJ *a;
1800   Mat          B;
1801   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1802   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1803   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1804   int          *masked,nmask,tmp,bs2,ishift;
1805   PetscScalar  *aa;
1806   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1807 
1808   PetscFunctionBegin;
1809   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1810   bs2  = bs*bs;
1811 
1812   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1813   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1814   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1815   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1816   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1817   M = header[1]; N = header[2]; nz = header[3];
1818 
1819   if (header[3] < 0) {
1820     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1821   }
1822 
1823   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1824 
1825   /*
1826      This code adds extra rows to make sure the number of rows is
1827     divisible by the blocksize
1828   */
1829   mbs        = M/bs;
1830   extra_rows = bs - M + bs*(mbs);
1831   if (extra_rows == bs) extra_rows = 0;
1832   else                  mbs++;
1833   if (extra_rows) {
1834     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1835   }
1836 
1837   /* read in row lengths */
1838   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1839   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1840   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1841 
1842   /* read in column indices */
1843   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1844   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1845   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1846 
1847   /* loop over row lengths determining block row lengths */
1848   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1849   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1850   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1851   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1852   masked   = mask + mbs;
1853   rowcount = 0; nzcount = 0;
1854   for (i=0; i<mbs; i++) {
1855     nmask = 0;
1856     for (j=0; j<bs; j++) {
1857       kmax = rowlengths[rowcount];
1858       for (k=0; k<kmax; k++) {
1859         tmp = jj[nzcount++]/bs;   /* block col. index */
1860         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1861       }
1862       rowcount++;
1863     }
1864     s_browlengths[i] += nmask;
1865 
1866     /* zero out the mask elements we set */
1867     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1868   }
1869 
1870   /* create our matrix */
1871   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr);
1872   B = *A;
1873   a = (Mat_SeqSBAIJ*)B->data;
1874 
1875   /* set matrix "i" values */
1876   a->i[0] = 0;
1877   for (i=1; i<= mbs; i++) {
1878     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1879     a->ilen[i-1] = s_browlengths[i-1];
1880   }
1881   a->s_nz = a->i[mbs];
1882 
1883   /* read in nonzero values */
1884   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1885   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1886   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1887 
1888   /* set "a" and "j" values into matrix */
1889   nzcount = 0; jcount = 0;
1890   for (i=0; i<mbs; i++) {
1891     nzcountb = nzcount;
1892     nmask    = 0;
1893     for (j=0; j<bs; j++) {
1894       kmax = rowlengths[i*bs+j];
1895       for (k=0; k<kmax; k++) {
1896         tmp = jj[nzcount++]/bs; /* block col. index */
1897         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1898       }
1899     }
1900     /* sort the masked values */
1901     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1902 
1903     /* set "j" values into matrix */
1904     maskcount = 1;
1905     for (j=0; j<nmask; j++) {
1906       a->j[jcount++]  = masked[j];
1907       mask[masked[j]] = maskcount++;
1908     }
1909 
1910     /* set "a" values into matrix */
1911     ishift = bs2*a->i[i];
1912     for (j=0; j<bs; j++) {
1913       kmax = rowlengths[i*bs+j];
1914       for (k=0; k<kmax; k++) {
1915         tmp       = jj[nzcountb]/bs ; /* block col. index */
1916         if (tmp >= i){
1917           block     = mask[tmp] - 1;
1918           point     = jj[nzcountb] - bs*tmp;
1919           idx       = ishift + bs2*block + j + bs*point;
1920           a->a[idx] = aa[nzcountb];
1921         }
1922         nzcountb++;
1923       }
1924     }
1925     /* zero out the mask elements we set */
1926     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1927   }
1928   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1929 
1930   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1931   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1932   ierr = PetscFree(aa);CHKERRQ(ierr);
1933   ierr = PetscFree(jj);CHKERRQ(ierr);
1934   ierr = PetscFree(mask);CHKERRQ(ierr);
1935 
1936   B->assembled = PETSC_TRUE;
1937   ierr = MatView_Private(B);CHKERRQ(ierr);
1938   PetscFunctionReturn(0);
1939 }
1940 EXTERN_C_END
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1944 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1945 {
1946   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1947   MatScalar    *aa=a->a,*v,*v1;
1948   PetscScalar  *x,*b,*t,sum,d;
1949   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1950   int          nz,nz1,*vj,*vj1,i;
1951 
1952   PetscFunctionBegin;
1953   its = its*lits;
1954   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1955 
1956   if (bs > 1)
1957     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1958 
1959   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1960   if (xx != bb) {
1961     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1962   } else {
1963     b = x;
1964   }
1965 
1966   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1967 
1968   if (flag & SOR_ZERO_INITIAL_GUESS) {
1969     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1970       for (i=0; i<m; i++)
1971         t[i] = b[i];
1972 
1973       for (i=0; i<m; i++){
1974         d  = *(aa + ai[i]);  /* diag[i] */
1975         v  = aa + ai[i] + 1;
1976         vj = aj + ai[i] + 1;
1977         nz = ai[i+1] - ai[i] - 1;
1978         x[i] = omega*t[i]/d;
1979         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1980         PetscLogFlops(2*nz-1);
1981       }
1982     }
1983 
1984     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1985       for (i=0; i<m; i++)
1986         t[i] = b[i];
1987 
1988       for (i=0; i<m-1; i++){  /* update rhs */
1989         v  = aa + ai[i] + 1;
1990         vj = aj + ai[i] + 1;
1991         nz = ai[i+1] - ai[i] - 1;
1992         while (nz--) t[*vj++] -= x[i]*(*v++);
1993         PetscLogFlops(2*nz-1);
1994       }
1995       for (i=m-1; i>=0; i--){
1996         d  = *(aa + ai[i]);
1997         v  = aa + ai[i] + 1;
1998         vj = aj + ai[i] + 1;
1999         nz = ai[i+1] - ai[i] - 1;
2000         sum = t[i];
2001         while (nz--) sum -= x[*vj++]*(*v++);
2002         PetscLogFlops(2*nz-1);
2003         x[i] =   (1-omega)*x[i] + omega*sum/d;
2004       }
2005     }
2006     its--;
2007   }
2008 
2009   while (its--) {
2010     /*
2011        forward sweep:
2012        for i=0,...,m-1:
2013          sum[i] = (b[i] - U(i,:)x )/d[i];
2014          x[i]   = (1-omega)x[i] + omega*sum[i];
2015          b      = b - x[i]*U^T(i,:);
2016 
2017     */
2018     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2019       for (i=0; i<m; i++)
2020         t[i] = b[i];
2021 
2022       for (i=0; i<m; i++){
2023         d  = *(aa + ai[i]);  /* diag[i] */
2024         v  = aa + ai[i] + 1; v1=v;
2025         vj = aj + ai[i] + 1; vj1=vj;
2026         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2027         sum = t[i];
2028         while (nz1--) sum -= (*v1++)*x[*vj1++];
2029         x[i] = (1-omega)*x[i] + omega*sum/d;
2030         while (nz--) t[*vj++] -= x[i]*(*v++);
2031         PetscLogFlops(4*nz-2);
2032       }
2033     }
2034 
2035   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2036       /*
2037        backward sweep:
2038        b = b - x[i]*U^T(i,:), i=0,...,n-2
2039        for i=m-1,...,0:
2040          sum[i] = (b[i] - U(i,:)x )/d[i];
2041          x[i]   = (1-omega)x[i] + omega*sum[i];
2042       */
2043       for (i=0; i<m; i++)
2044         t[i] = b[i];
2045 
2046       for (i=0; i<m-1; i++){  /* update rhs */
2047         v  = aa + ai[i] + 1;
2048         vj = aj + ai[i] + 1;
2049         nz = ai[i+1] - ai[i] - 1;
2050         while (nz--) t[*vj++] -= x[i]*(*v++);
2051         PetscLogFlops(2*nz-1);
2052       }
2053       for (i=m-1; i>=0; i--){
2054         d  = *(aa + ai[i]);
2055         v  = aa + ai[i] + 1;
2056         vj = aj + ai[i] + 1;
2057         nz = ai[i+1] - ai[i] - 1;
2058         sum = t[i];
2059         while (nz--) sum -= x[*vj++]*(*v++);
2060         PetscLogFlops(2*nz-1);
2061         x[i] =   (1-omega)*x[i] + omega*sum/d;
2062       }
2063     }
2064   }
2065 
2066   ierr = PetscFree(t); CHKERRQ(ierr);
2067   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2068   if (bb != xx) {
2069     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2070   }
2071 
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 
2076 
2077 
2078 
2079 
2080