xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision fb10cecf1927e102c85c17a39b04f58da85ca3ce)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   case MAT_SYMMETRIC:
183   case MAT_STRUCTURALLY_SYMMETRIC:
184     break;
185   default:
186     SETERRQ(PETSC_ERR_SUP,"unknown option");
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
193 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
194 {
195   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
196   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
197   MatScalar    *aa,*aa_i;
198   PetscScalar  *v_i;
199 
200   PetscFunctionBegin;
201   bs  = a->bs;
202   ai  = a->i;
203   aj  = a->j;
204   aa  = a->a;
205   bs2 = a->bs2;
206 
207   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
208 
209   bn  = row/bs;   /* Block number */
210   bp  = row % bs; /* Block position */
211   M   = ai[bn+1] - ai[bn];
212   *ncols = bs*M;
213 
214   if (v) {
215     *v = 0;
216     if (*ncols) {
217       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
218       for (i=0; i<M; i++) { /* for each block in the block row */
219         v_i  = *v + i*bs;
220         aa_i = aa + bs2*(ai[bn] + i);
221         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
222       }
223     }
224   }
225 
226   if (cols) {
227     *cols = 0;
228     if (*ncols) {
229       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
230       for (i=0; i<M; i++) { /* for each block in the block row */
231         cols_i = *cols + i*bs;
232         itmp  = bs*aj[ai[bn] + i];
233         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
234       }
235     }
236   }
237 
238   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
239   /* this segment is currently removed, so only entries in the upper triangle are obtained */
240 #ifdef column_search
241   v_i    = *v    + M*bs;
242   cols_i = *cols + M*bs;
243   for (i=0; i<bn; i++){ /* for each block row */
244     M = ai[i+1] - ai[i];
245     for (j=0; j<M; j++){
246       itmp = aj[ai[i] + j];    /* block column value */
247       if (itmp == bn){
248         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
249         for (k=0; k<bs; k++) {
250           *cols_i++ = i*bs+k;
251           *v_i++    = aa_i[k];
252         }
253         *ncols += bs;
254         break;
255       }
256     }
257   }
258 #endif
259 
260   PetscFunctionReturn(0);
261 }
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
265 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
266 {
267   int ierr;
268 
269   PetscFunctionBegin;
270   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
271   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
277 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
278 {
279   int ierr;
280   PetscFunctionBegin;
281   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
287 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
288 {
289   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
290   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
291   PetscScalar  *aa;
292   FILE         *file;
293 
294   PetscFunctionBegin;
295   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
296   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
297   col_lens[0] = MAT_FILE_COOKIE;
298 
299   col_lens[1] = A->m;
300   col_lens[2] = A->m;
301   col_lens[3] = a->s_nz*bs2;
302 
303   /* store lengths of each row and write (including header) to file */
304   count = 0;
305   for (i=0; i<a->mbs; i++) {
306     for (j=0; j<bs; j++) {
307       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
308     }
309   }
310   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
311   ierr = PetscFree(col_lens);CHKERRQ(ierr);
312 
313   /* store column indices (zero start index) */
314   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
315   count = 0;
316   for (i=0; i<a->mbs; i++) {
317     for (j=0; j<bs; j++) {
318       for (k=a->i[i]; k<a->i[i+1]; k++) {
319         for (l=0; l<bs; l++) {
320           jj[count++] = bs*a->j[k] + l;
321         }
322       }
323     }
324   }
325   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
326   ierr = PetscFree(jj);CHKERRQ(ierr);
327 
328   /* store nonzero values */
329   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
330   count = 0;
331   for (i=0; i<a->mbs; i++) {
332     for (j=0; j<bs; j++) {
333       for (k=a->i[i]; k<a->i[i+1]; k++) {
334         for (l=0; l<bs; l++) {
335           aa[count++] = a->a[bs2*k + l*bs + j];
336         }
337       }
338     }
339   }
340   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
341   ierr = PetscFree(aa);CHKERRQ(ierr);
342 
343   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
344   if (file) {
345     fprintf(file,"-matload_block_size %d\n",a->bs);
346   }
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
352 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
353 {
354   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
355   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
356   char              *name;
357   PetscViewerFormat format;
358 
359   PetscFunctionBegin;
360   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
361   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
362   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
363     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
364   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
365     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
366   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
367     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
368     for (i=0; i<a->mbs; i++) {
369       for (j=0; j<bs; j++) {
370         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
371         for (k=a->i[i]; k<a->i[i+1]; k++) {
372           for (l=0; l<bs; l++) {
373 #if defined(PETSC_USE_COMPLEX)
374             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
375               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
376                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
377             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
378               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
379                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
380             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
381               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
382             }
383 #else
384             if (a->a[bs2*k + l*bs + j] != 0.0) {
385               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
386             }
387 #endif
388           }
389         }
390         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
391       }
392     }
393     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
394   } else {
395     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
396     for (i=0; i<a->mbs; i++) {
397       for (j=0; j<bs; j++) {
398         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
399         for (k=a->i[i]; k<a->i[i+1]; k++) {
400           for (l=0; l<bs; l++) {
401 #if defined(PETSC_USE_COMPLEX)
402             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
403               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
404                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
405             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
406               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
407                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
408             } else {
409               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
410             }
411 #else
412             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
413 #endif
414           }
415         }
416         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
417       }
418     }
419     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
420   }
421   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
427 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
428 {
429   Mat          A = (Mat) Aa;
430   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
431   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
432   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
433   MatScalar    *aa;
434   MPI_Comm     comm;
435   PetscViewer  viewer;
436 
437   PetscFunctionBegin;
438   /*
439       This is nasty. If this is called from an originally parallel matrix
440    then all processes call this,but only the first has the matrix so the
441    rest should return immediately.
442   */
443   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
444   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
445   if (rank) PetscFunctionReturn(0);
446 
447   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
448 
449   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
450   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
451 
452   /* loop over matrix elements drawing boxes */
453   color = PETSC_DRAW_BLUE;
454   for (i=0,row=0; i<mbs; i++,row+=bs) {
455     for (j=a->i[i]; j<a->i[i+1]; j++) {
456       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
457       x_l = a->j[j]*bs; x_r = x_l + 1.0;
458       aa = a->a + j*bs2;
459       for (k=0; k<bs; k++) {
460         for (l=0; l<bs; l++) {
461           if (PetscRealPart(*aa++) >=  0.) continue;
462           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
463         }
464       }
465     }
466   }
467   color = PETSC_DRAW_CYAN;
468   for (i=0,row=0; i<mbs; i++,row+=bs) {
469     for (j=a->i[i]; j<a->i[i+1]; j++) {
470       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
471       x_l = a->j[j]*bs; x_r = x_l + 1.0;
472       aa = a->a + j*bs2;
473       for (k=0; k<bs; k++) {
474         for (l=0; l<bs; l++) {
475           if (PetscRealPart(*aa++) != 0.) continue;
476           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
477         }
478       }
479     }
480   }
481 
482   color = PETSC_DRAW_RED;
483   for (i=0,row=0; i<mbs; i++,row+=bs) {
484     for (j=a->i[i]; j<a->i[i+1]; j++) {
485       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
486       x_l = a->j[j]*bs; x_r = x_l + 1.0;
487       aa = a->a + j*bs2;
488       for (k=0; k<bs; k++) {
489         for (l=0; l<bs; l++) {
490           if (PetscRealPart(*aa++) <= 0.) continue;
491           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
492         }
493       }
494     }
495   }
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNCT__
500 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
501 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
502 {
503   int          ierr;
504   PetscReal    xl,yl,xr,yr,w,h;
505   PetscDraw    draw;
506   PetscTruth   isnull;
507 
508   PetscFunctionBegin;
509 
510   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
511   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
512 
513   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
514   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
515   xr += w;    yr += h;  xl = -w;     yl = -h;
516   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
517   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
518   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "MatView_SeqSBAIJ"
524 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
525 {
526   int        ierr;
527   PetscTruth issocket,isascii,isbinary,isdraw;
528 
529   PetscFunctionBegin;
530   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
531   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
532   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
533   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
534   if (issocket) {
535     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
536   } else if (isascii){
537     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
538   } else if (isbinary) {
539     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
540   } else if (isdraw) {
541     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
542   } else {
543     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
544   }
545   PetscFunctionReturn(0);
546 }
547 
548 
549 #undef __FUNCT__
550 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
551 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
552 {
553   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
554   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
555   int        *ai = a->i,*ailen = a->ilen;
556   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
557   MatScalar  *ap,*aa = a->a,zero = 0.0;
558 
559   PetscFunctionBegin;
560   for (k=0; k<m; k++) { /* loop over rows */
561     row  = im[k]; brow = row/bs;
562     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
563     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
564     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
565     nrow = ailen[brow];
566     for (l=0; l<n; l++) { /* loop over columns */
567       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
568       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
569       col  = in[l] ;
570       bcol = col/bs;
571       cidx = col%bs;
572       ridx = row%bs;
573       high = nrow;
574       low  = 0; /* assume unsorted */
575       while (high-low > 5) {
576         t = (low+high)/2;
577         if (rp[t] > bcol) high = t;
578         else             low  = t;
579       }
580       for (i=low; i<high; i++) {
581         if (rp[i] > bcol) break;
582         if (rp[i] == bcol) {
583           *v++ = ap[bs2*i+bs*cidx+ridx];
584           goto finished;
585         }
586       }
587       *v++ = zero;
588       finished:;
589     }
590   }
591   PetscFunctionReturn(0);
592 }
593 
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
597 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
598 {
599   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
600   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
601   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
602   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
603   PetscTruth      roworiented=a->roworiented;
604   const MatScalar *value = v;
605   MatScalar       *ap,*aa = a->a,*bap;
606 
607   PetscFunctionBegin;
608   if (roworiented) {
609     stepval = (n-1)*bs;
610   } else {
611     stepval = (m-1)*bs;
612   }
613   for (k=0; k<m; k++) { /* loop over added rows */
614     row  = im[k];
615     if (row < 0) continue;
616 #if defined(PETSC_USE_BOPT_g)
617     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
618 #endif
619     rp   = aj + ai[row];
620     ap   = aa + bs2*ai[row];
621     rmax = imax[row];
622     nrow = ailen[row];
623     low  = 0;
624     for (l=0; l<n; l++) { /* loop over added columns */
625       if (in[l] < 0) continue;
626       col = in[l];
627 #if defined(PETSC_USE_BOPT_g)
628       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
629 #endif
630       if (col < row) continue; /* ignore lower triangular block */
631       if (roworiented) {
632         value = v + k*(stepval+bs)*bs + l*bs;
633       } else {
634         value = v + l*(stepval+bs)*bs + k*bs;
635       }
636       if (!sorted) low = 0; high = nrow;
637       while (high-low > 7) {
638         t = (low+high)/2;
639         if (rp[t] > col) high = t;
640         else             low  = t;
641       }
642       for (i=low; i<high; i++) {
643         if (rp[i] > col) break;
644         if (rp[i] == col) {
645           bap  = ap +  bs2*i;
646           if (roworiented) {
647             if (is == ADD_VALUES) {
648               for (ii=0; ii<bs; ii++,value+=stepval) {
649                 for (jj=ii; jj<bs2; jj+=bs) {
650                   bap[jj] += *value++;
651                 }
652               }
653             } else {
654               for (ii=0; ii<bs; ii++,value+=stepval) {
655                 for (jj=ii; jj<bs2; jj+=bs) {
656                   bap[jj] = *value++;
657                 }
658               }
659             }
660           } else {
661             if (is == ADD_VALUES) {
662               for (ii=0; ii<bs; ii++,value+=stepval) {
663                 for (jj=0; jj<bs; jj++) {
664                   *bap++ += *value++;
665                 }
666               }
667             } else {
668               for (ii=0; ii<bs; ii++,value+=stepval) {
669                 for (jj=0; jj<bs; jj++) {
670                   *bap++  = *value++;
671                 }
672               }
673             }
674           }
675           goto noinsert2;
676         }
677       }
678       if (nonew == 1) goto noinsert2;
679       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
680       if (nrow >= rmax) {
681         /* there is no extra room in row, therefore enlarge */
682         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
683         MatScalar *new_a;
684 
685         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
686 
687         /* malloc new storage space */
688         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
689 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
690         new_j   = (int*)(new_a + bs2*new_nz);
691         new_i   = new_j + new_nz;
692 
693         /* copy over old data into new slots */
694         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
695         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
696         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
697         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
698         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
699         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
700         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
701         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
702         /* free up old matrix storage */
703         ierr = PetscFree(a->a);CHKERRQ(ierr);
704         if (!a->singlemalloc) {
705           ierr = PetscFree(a->i);CHKERRQ(ierr);
706           ierr = PetscFree(a->j);CHKERRQ(ierr);
707         }
708         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
709         a->singlemalloc = PETSC_TRUE;
710 
711         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
712         rmax = imax[row] = imax[row] + CHUNKSIZE;
713         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
714         a->s_maxnz += bs2*CHUNKSIZE;
715         a->reallocs++;
716         a->s_nz++;
717       }
718       N = nrow++ - 1;
719       /* shift up all the later entries in this row */
720       for (ii=N; ii>=i; ii--) {
721         rp[ii+1] = rp[ii];
722         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
723       }
724       if (N >= i) {
725         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
726       }
727       rp[i] = col;
728       bap   = ap +  bs2*i;
729       if (roworiented) {
730         for (ii=0; ii<bs; ii++,value+=stepval) {
731           for (jj=ii; jj<bs2; jj+=bs) {
732             bap[jj] = *value++;
733           }
734         }
735       } else {
736         for (ii=0; ii<bs; ii++,value+=stepval) {
737           for (jj=0; jj<bs; jj++) {
738             *bap++  = *value++;
739           }
740         }
741       }
742       noinsert2:;
743       low = i;
744     }
745     ailen[row] = nrow;
746   }
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
752 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
753 {
754   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
755   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
756   int        m = A->m,*ip,N,*ailen = a->ilen;
757   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
758   MatScalar  *aa = a->a,*ap;
759 
760   PetscFunctionBegin;
761   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
762 
763   if (m) rmax = ailen[0];
764   for (i=1; i<mbs; i++) {
765     /* move each row back by the amount of empty slots (fshift) before it*/
766     fshift += imax[i-1] - ailen[i-1];
767     rmax   = PetscMax(rmax,ailen[i]);
768     if (fshift) {
769       ip = aj + ai[i]; ap = aa + bs2*ai[i];
770       N = ailen[i];
771       for (j=0; j<N; j++) {
772         ip[j-fshift] = ip[j];
773         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
774       }
775     }
776     ai[i] = ai[i-1] + ailen[i-1];
777   }
778   if (mbs) {
779     fshift += imax[mbs-1] - ailen[mbs-1];
780     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
781   }
782   /* reset ilen and imax for each row */
783   for (i=0; i<mbs; i++) {
784     ailen[i] = imax[i] = ai[i+1] - ai[i];
785   }
786   a->s_nz = ai[mbs];
787 
788   /* diagonals may have moved, reset it */
789   if (a->diag) {
790     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
791   }
792   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
793            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
794   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
795            a->reallocs);
796   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
797   a->reallocs          = 0;
798   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
799 
800   PetscFunctionReturn(0);
801 }
802 
803 /*
804    This function returns an array of flags which indicate the locations of contiguous
805    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
806    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
807    Assume: sizes should be long enough to hold all the values.
808 */
809 #undef __FUNCT__
810 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
811 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
812 {
813   int        i,j,k,row;
814   PetscTruth flg;
815 
816   PetscFunctionBegin;
817   for (i=0,j=0; i<n; j++) {
818     row = idx[i];
819     if (row%bs!=0) { /* Not the begining of a block */
820       sizes[j] = 1;
821       i++;
822     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
823       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
824       i++;
825     } else { /* Begining of the block, so check if the complete block exists */
826       flg = PETSC_TRUE;
827       for (k=1; k<bs; k++) {
828         if (row+k != idx[i+k]) { /* break in the block */
829           flg = PETSC_FALSE;
830           break;
831         }
832       }
833       if (flg == PETSC_TRUE) { /* No break in the bs */
834         sizes[j] = bs;
835         i+= bs;
836       } else {
837         sizes[j] = 1;
838         i++;
839       }
840     }
841   }
842   *bs_max = j;
843   PetscFunctionReturn(0);
844 }
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
848 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
849 {
850   PetscFunctionBegin;
851   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
852 }
853 
854 /* Only add/insert a(i,j) with i<=j (blocks).
855    Any a(i,j) with i>j input by user is ingored.
856 */
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
860 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
861 {
862   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
863   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
864   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
865   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
866   int         ridx,cidx,bs2=a->bs2,ierr;
867   MatScalar   *ap,value,*aa=a->a,*bap;
868 
869   PetscFunctionBegin;
870 
871   for (k=0; k<m; k++) { /* loop over added rows */
872     row  = im[k];       /* row number */
873     brow = row/bs;      /* block row number */
874     if (row < 0) continue;
875 #if defined(PETSC_USE_BOPT_g)
876     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
877 #endif
878     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
879     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
880     rmax = imax[brow];  /* maximum space allocated for this row */
881     nrow = ailen[brow]; /* actual length of this row */
882     low  = 0;
883 
884     for (l=0; l<n; l++) { /* loop over added columns */
885       if (in[l] < 0) continue;
886 #if defined(PETSC_USE_BOPT_g)
887       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
888 #endif
889       col = in[l];
890       bcol = col/bs;              /* block col number */
891 
892       if (brow <= bcol){
893         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
894         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
895           /* element value a(k,l) */
896           if (roworiented) {
897             value = v[l + k*n];
898           } else {
899             value = v[k + l*m];
900           }
901 
902           /* move pointer bap to a(k,l) quickly and add/insert value */
903           if (!sorted) low = 0; high = nrow;
904           while (high-low > 7) {
905             t = (low+high)/2;
906             if (rp[t] > bcol) high = t;
907             else              low  = t;
908           }
909           for (i=low; i<high; i++) {
910             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
911             if (rp[i] > bcol) break;
912             if (rp[i] == bcol) {
913               bap  = ap +  bs2*i + bs*cidx + ridx;
914               if (is == ADD_VALUES) *bap += value;
915               else                  *bap  = value;
916               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
917               if (brow == bcol && ridx < cidx){
918                 bap  = ap +  bs2*i + bs*ridx + cidx;
919                 if (is == ADD_VALUES) *bap += value;
920                 else                  *bap  = value;
921               }
922               goto noinsert1;
923             }
924           }
925 
926       if (nonew == 1) goto noinsert1;
927       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
928       if (nrow >= rmax) {
929         /* there is no extra room in row, therefore enlarge */
930         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
931         MatScalar *new_a;
932 
933         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
934 
935         /* Malloc new storage space */
936         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
937         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
938         new_j = (int*)(new_a + bs2*new_nz);
939         new_i = new_j + new_nz;
940 
941         /* copy over old data into new slots */
942         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
943         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
944         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
945         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
946         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
947         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
948         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
949         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
950         /* free up old matrix storage */
951         ierr = PetscFree(a->a);CHKERRQ(ierr);
952         if (!a->singlemalloc) {
953           ierr = PetscFree(a->i);CHKERRQ(ierr);
954           ierr = PetscFree(a->j);CHKERRQ(ierr);
955         }
956         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
957         a->singlemalloc = PETSC_TRUE;
958 
959         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
960         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
961         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
962         a->s_maxnz += bs2*CHUNKSIZE;
963         a->reallocs++;
964         a->s_nz++;
965       }
966 
967       N = nrow++ - 1;
968       /* shift up all the later entries in this row */
969       for (ii=N; ii>=i; ii--) {
970         rp[ii+1] = rp[ii];
971         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
972       }
973       if (N>=i) {
974         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
975       }
976       rp[i]                      = bcol;
977       ap[bs2*i + bs*cidx + ridx] = value;
978       noinsert1:;
979       low = i;
980       /* } */
981         }
982       } /* end of if .. if..  */
983     }                     /* end of loop over added columns */
984     ailen[brow] = nrow;
985   }                       /* end of loop over added rows */
986 
987   PetscFunctionReturn(0);
988 }
989 
990 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
991 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
992 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
993 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
994 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
995 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
996 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
997 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
998 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
999 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1000 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1001 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1002 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1003 extern int MatZeroEntries_SeqSBAIJ(Mat);
1004 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1005 
1006 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1007 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1008 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1009 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1010 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1011 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1012 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1013 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1014 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1015 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1016 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1017 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1018 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1019 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1020 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1021 
1022 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1023 
1024 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1025 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1026 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1027 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1028 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1029 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1030 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1031 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1032 
1033 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1034 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1035 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1041 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1042 
1043 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1044 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1045 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1046 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1047 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1048 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1049 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1050 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1051 
1052 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1053 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1054 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1055 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1056 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1057 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1058 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1059 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1060 
1061 #ifdef HAVE_MatICCFactor
1062 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1065 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1066 {
1067   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1068   Mat         outA;
1069   int         ierr;
1070   PetscTruth  row_identity,col_identity;
1071 
1072   PetscFunctionBegin;
1073   /*
1074   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1075   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1076   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1077   if (!row_identity || !col_identity) {
1078     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1079   }
1080   */
1081 
1082   outA          = inA;
1083   inA->factor   = FACTOR_CHOLESKY;
1084 
1085   if (!a->diag) {
1086     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1087   }
1088   /*
1089       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1090       for ILU(0) factorization with natural ordering
1091   */
1092   switch (a->bs) {
1093   case 1:
1094     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1095     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1096     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1097     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1098   case 2:
1099     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1100     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1101     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1102     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1103     break;
1104   case 3:
1105     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1106     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1107     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1108     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1109     break;
1110   case 4:
1111     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1112     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1113     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1114     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1115     break;
1116   case 5:
1117     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1118     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1119     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1120     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1121     break;
1122   case 6:
1123     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1124     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1125     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1126     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1127     break;
1128   case 7:
1129     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1130     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1131     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1132     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1133     break;
1134   default:
1135     a->row        = row;
1136     a->icol       = col;
1137     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1138     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1139 
1140     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1141     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1142     PetscLogObjectParent(inA,a->icol);
1143 
1144     if (!a->solve_work) {
1145       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1146       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1147     }
1148   }
1149 
1150   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1151 
1152   PetscFunctionReturn(0);
1153 }
1154 #endif
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1158 int MatPrintHelp_SeqSBAIJ(Mat A)
1159 {
1160   static PetscTruth called = PETSC_FALSE;
1161   MPI_Comm          comm = A->comm;
1162   int               ierr;
1163 
1164   PetscFunctionBegin;
1165   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1166   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1167   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 EXTERN_C_BEGIN
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1174 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1175 {
1176   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1177   int         i,nz,n;
1178 
1179   PetscFunctionBegin;
1180   nz = baij->s_maxnz;
1181   n  = mat->n;
1182   for (i=0; i<nz; i++) {
1183     baij->j[i] = indices[i];
1184   }
1185   baij->s_nz = nz;
1186   for (i=0; i<n; i++) {
1187     baij->ilen[i] = baij->imax[i];
1188   }
1189 
1190   PetscFunctionReturn(0);
1191 }
1192 EXTERN_C_END
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1196 /*@
1197     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1198        in the matrix.
1199 
1200   Input Parameters:
1201 +  mat     - the SeqSBAIJ matrix
1202 -  indices - the column indices
1203 
1204   Level: advanced
1205 
1206   Notes:
1207     This can be called if you have precomputed the nonzero structure of the
1208   matrix and want to provide it to the matrix object to improve the performance
1209   of the MatSetValues() operation.
1210 
1211     You MUST have set the correct numbers of nonzeros per row in the call to
1212   MatCreateSeqSBAIJ().
1213 
1214     MUST be called before any calls to MatSetValues()
1215 
1216 .seealso: MatCreateSeqSBAIJ
1217 @*/
1218 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1219 {
1220   int ierr,(*f)(Mat,int *);
1221 
1222   PetscFunctionBegin;
1223   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1224   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1225   if (f) {
1226     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1227   } else {
1228     SETERRQ(1,"Wrong type of matrix to set column indices");
1229   }
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1235 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1236 {
1237   int        ierr;
1238 
1239   PetscFunctionBegin;
1240   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1246 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1247 {
1248   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1249   PetscFunctionBegin;
1250   *array = a->a;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1256 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1257 {
1258   PetscFunctionBegin;
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #include "petscblaslapack.h"
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1265 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1266 {
1267   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1268   int          ierr,one=1,i,bs=y->bs,bs2,j;
1269 
1270   PetscFunctionBegin;
1271   if (str == SAME_NONZERO_PATTERN) {
1272     BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1273   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1274     if (y->xtoy && y->XtoY != X) {
1275       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1276       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1277     }
1278     if (!y->xtoy) { /* get xtoy */
1279       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1280       y->XtoY = X;
1281     }
1282     bs2 = bs*bs;
1283     for (i=0; i<x->s_nz; i++) {
1284       j = 0;
1285       while (j < bs2){
1286         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1287         j++;
1288       }
1289     }
1290     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz));
1291   } else {
1292     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1293   }
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 /* -------------------------------------------------------------------*/
1298 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1299        MatGetRow_SeqSBAIJ,
1300        MatRestoreRow_SeqSBAIJ,
1301        MatMult_SeqSBAIJ_N,
1302 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1303        MatMultTranspose_SeqSBAIJ,
1304        MatMultTransposeAdd_SeqSBAIJ,
1305        MatSolve_SeqSBAIJ_N,
1306        0,
1307        0,
1308 /*10*/ 0,
1309        0,
1310        MatCholeskyFactor_SeqSBAIJ,
1311        MatRelax_SeqSBAIJ,
1312        MatTranspose_SeqSBAIJ,
1313 /*15*/ MatGetInfo_SeqSBAIJ,
1314        MatEqual_SeqSBAIJ,
1315        MatGetDiagonal_SeqSBAIJ,
1316        MatDiagonalScale_SeqSBAIJ,
1317        MatNorm_SeqSBAIJ,
1318 /*20*/ 0,
1319        MatAssemblyEnd_SeqSBAIJ,
1320        0,
1321        MatSetOption_SeqSBAIJ,
1322        MatZeroEntries_SeqSBAIJ,
1323 /*25*/ MatZeroRows_SeqSBAIJ,
1324        0,
1325        0,
1326        MatCholeskyFactorSymbolic_SeqSBAIJ,
1327        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1328 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1329        0,
1330        MatICCFactorSymbolic_SeqSBAIJ,
1331        MatGetArray_SeqSBAIJ,
1332        MatRestoreArray_SeqSBAIJ,
1333 /*35*/ MatDuplicate_SeqSBAIJ,
1334        0,
1335        0,
1336        0,
1337        0,
1338 /*40*/ MatAXPY_SeqSBAIJ,
1339        MatGetSubMatrices_SeqSBAIJ,
1340        MatIncreaseOverlap_SeqSBAIJ,
1341        MatGetValues_SeqSBAIJ,
1342        0,
1343 /*45*/ MatPrintHelp_SeqSBAIJ,
1344        MatScale_SeqSBAIJ,
1345        0,
1346        0,
1347        0,
1348 /*50*/ MatGetBlockSize_SeqSBAIJ,
1349        MatGetRowIJ_SeqSBAIJ,
1350        MatRestoreRowIJ_SeqSBAIJ,
1351        0,
1352        0,
1353 /*55*/ 0,
1354        0,
1355        0,
1356        0,
1357        MatSetValuesBlocked_SeqSBAIJ,
1358 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1359        0,
1360        0,
1361        MatGetPetscMaps_Petsc,
1362        0,
1363 /*65*/ 0,
1364        0,
1365        0,
1366        0,
1367        0,
1368 /*70*/ MatGetRowMax_SeqSBAIJ,
1369        0,
1370        0,
1371        0,
1372        0,
1373 /*75*/ 0,
1374        0,
1375        0,
1376        0,
1377        0,
1378 /*80*/ 0,
1379        0,
1380        0,
1381        0,
1382 #if !defined(PETSC_USE_COMPLEX)
1383        MatGetInertia_SeqSBAIJ,
1384 #else
1385        0,
1386 #endif
1387 /*85*/ MatLoad_SeqSBAIJ
1388 };
1389 
1390 EXTERN_C_BEGIN
1391 #undef __FUNCT__
1392 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1393 int MatStoreValues_SeqSBAIJ(Mat mat)
1394 {
1395   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1396   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1397   int         ierr;
1398 
1399   PetscFunctionBegin;
1400   if (aij->nonew != 1) {
1401     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1402   }
1403 
1404   /* allocate space for values if not already there */
1405   if (!aij->saved_values) {
1406     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1407   }
1408 
1409   /* copy values over */
1410   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 EXTERN_C_END
1414 
1415 EXTERN_C_BEGIN
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1418 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1419 {
1420   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1421   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1422 
1423   PetscFunctionBegin;
1424   if (aij->nonew != 1) {
1425     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1426   }
1427   if (!aij->saved_values) {
1428     SETERRQ(1,"Must call MatStoreValues(A);first");
1429   }
1430 
1431   /* copy values over */
1432   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 EXTERN_C_END
1436 
1437 EXTERN_C_BEGIN
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1440 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1441 {
1442   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1443   int          i,len,ierr,mbs,bs2;
1444   PetscTruth   flg;
1445   int          s_nz;
1446 
1447   PetscFunctionBegin;
1448   B->preallocated = PETSC_TRUE;
1449   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1450   mbs  = B->m/bs;
1451   bs2  = bs*bs;
1452 
1453   if (mbs*bs != B->m) {
1454     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1455   }
1456 
1457   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1458   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1459   if (nnz) {
1460     for (i=0; i<mbs; i++) {
1461       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1462       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1463     }
1464   }
1465 
1466   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1467   if (!flg) {
1468     switch (bs) {
1469     case 1:
1470       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1471       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1472       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1473       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1474       B->ops->mult            = MatMult_SeqSBAIJ_1;
1475       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1476       break;
1477     case 2:
1478       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1479       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1480       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1481       B->ops->mult            = MatMult_SeqSBAIJ_2;
1482       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1483       break;
1484     case 3:
1485       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1486       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1487       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1488       B->ops->mult            = MatMult_SeqSBAIJ_3;
1489       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1490       break;
1491     case 4:
1492       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1493       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1494       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1495       B->ops->mult            = MatMult_SeqSBAIJ_4;
1496       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1497       break;
1498     case 5:
1499       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1500       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1501       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1502       B->ops->mult            = MatMult_SeqSBAIJ_5;
1503       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1504       break;
1505     case 6:
1506       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1507       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1508       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1509       B->ops->mult            = MatMult_SeqSBAIJ_6;
1510       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1511       break;
1512     case 7:
1513       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1514       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1515       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1516       B->ops->mult            = MatMult_SeqSBAIJ_7;
1517       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1518       break;
1519     }
1520   }
1521 
1522   b->mbs = mbs;
1523   b->nbs = mbs;
1524   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1525   if (!nnz) {
1526     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1527     else if (nz <= 0)        nz = 1;
1528     for (i=0; i<mbs; i++) {
1529       b->imax[i] = nz;
1530     }
1531     nz = nz*mbs; /* total nz */
1532   } else {
1533     nz = 0;
1534     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1535   }
1536   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1537   s_nz = nz;
1538 
1539   /* allocate the matrix space */
1540   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1541   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1542   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1543   b->j = (int*)(b->a + s_nz*bs2);
1544   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1545   b->i = b->j + s_nz;
1546   b->singlemalloc = PETSC_TRUE;
1547 
1548   /* pointer to beginning of each row */
1549   b->i[0] = 0;
1550   for (i=1; i<mbs+1; i++) {
1551     b->i[i] = b->i[i-1] + b->imax[i-1];
1552   }
1553 
1554   /* b->ilen will count nonzeros in each block row so far. */
1555   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1556   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1557   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1558 
1559   b->bs               = bs;
1560   b->bs2              = bs2;
1561   b->s_nz             = 0;
1562   b->s_maxnz          = s_nz*bs2;
1563 
1564   b->inew             = 0;
1565   b->jnew             = 0;
1566   b->anew             = 0;
1567   b->a2anew           = 0;
1568   b->permute          = PETSC_FALSE;
1569   PetscFunctionReturn(0);
1570 }
1571 EXTERN_C_END
1572 
1573 /*MC
1574    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1575    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1576 
1577    Options Database Keys:
1578 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1579 
1580   Level: beginner
1581 
1582 .seealso: MatCreateSeqSBAIJ
1583 M*/
1584 
1585 EXTERN_C_BEGIN
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1588 int MatCreate_SeqSBAIJ(Mat B)
1589 {
1590   Mat_SeqSBAIJ *b;
1591   int          ierr,size;
1592 
1593   PetscFunctionBegin;
1594   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1595   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1596   B->m = B->M = PetscMax(B->m,B->M);
1597   B->n = B->N = PetscMax(B->n,B->N);
1598 
1599   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1600   B->data = (void*)b;
1601   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1602   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1603   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1604   B->ops->view        = MatView_SeqSBAIJ;
1605   B->factor           = 0;
1606   B->lupivotthreshold = 1.0;
1607   B->mapping          = 0;
1608   b->row              = 0;
1609   b->icol             = 0;
1610   b->reallocs         = 0;
1611   b->saved_values     = 0;
1612 
1613   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1614   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1615 
1616   b->sorted           = PETSC_FALSE;
1617   b->roworiented      = PETSC_TRUE;
1618   b->nonew            = 0;
1619   b->diag             = 0;
1620   b->solve_work       = 0;
1621   b->mult_work        = 0;
1622   B->spptr            = 0;
1623   b->keepzeroedrows   = PETSC_FALSE;
1624   b->xtoy             = 0;
1625   b->XtoY             = 0;
1626 
1627   b->inew             = 0;
1628   b->jnew             = 0;
1629   b->anew             = 0;
1630   b->a2anew           = 0;
1631   b->permute          = PETSC_FALSE;
1632 
1633   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1634                                      "MatStoreValues_SeqSBAIJ",
1635                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1636   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1637                                      "MatRetrieveValues_SeqSBAIJ",
1638                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1639   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1640                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1641                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1642   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1643                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1644                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1645   PetscFunctionReturn(0);
1646 }
1647 EXTERN_C_END
1648 
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1651 /*@C
1652    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1653    compressed row) format.  For good matrix assembly performance the
1654    user should preallocate the matrix storage by setting the parameter nz
1655    (or the array nnz).  By setting these parameters accurately, performance
1656    during matrix assembly can be increased by more than a factor of 50.
1657 
1658    Collective on Mat
1659 
1660    Input Parameters:
1661 +  A - the symmetric matrix
1662 .  bs - size of block
1663 .  nz - number of block nonzeros per block row (same for all rows)
1664 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1665          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1666 
1667    Options Database Keys:
1668 .   -mat_no_unroll - uses code that does not unroll the loops in the
1669                      block calculations (much slower)
1670 .    -mat_block_size - size of the blocks to use
1671 
1672    Level: intermediate
1673 
1674    Notes:
1675    Specify the preallocated storage with either nz or nnz (not both).
1676    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1677    allocation.  For additional details, see the users manual chapter on
1678    matrices.
1679 
1680 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1681 @*/
1682 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1683   int ierr,(*f)(Mat,int,int,const int[]);
1684 
1685   PetscFunctionBegin;
1686   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1687   if (f) {
1688     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1689   }
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 #undef __FUNCT__
1694 #define __FUNCT__ "MatCreateSeqSBAIJ"
1695 /*@C
1696    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1697    compressed row) format.  For good matrix assembly performance the
1698    user should preallocate the matrix storage by setting the parameter nz
1699    (or the array nnz).  By setting these parameters accurately, performance
1700    during matrix assembly can be increased by more than a factor of 50.
1701 
1702    Collective on MPI_Comm
1703 
1704    Input Parameters:
1705 +  comm - MPI communicator, set to PETSC_COMM_SELF
1706 .  bs - size of block
1707 .  m - number of rows, or number of columns
1708 .  nz - number of block nonzeros per block row (same for all rows)
1709 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1710          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1711 
1712    Output Parameter:
1713 .  A - the symmetric matrix
1714 
1715    Options Database Keys:
1716 .   -mat_no_unroll - uses code that does not unroll the loops in the
1717                      block calculations (much slower)
1718 .    -mat_block_size - size of the blocks to use
1719 
1720    Level: intermediate
1721 
1722    Notes:
1723 
1724    Specify the preallocated storage with either nz or nnz (not both).
1725    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1726    allocation.  For additional details, see the users manual chapter on
1727    matrices.
1728 
1729 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1730 @*/
1731 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1732 {
1733   int ierr;
1734 
1735   PetscFunctionBegin;
1736   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1737   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1738   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1744 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1745 {
1746   Mat         C;
1747   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1748   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1749 
1750   PetscFunctionBegin;
1751   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1752 
1753   *B = 0;
1754   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1755   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1756   c    = (Mat_SeqSBAIJ*)C->data;
1757 
1758   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1759   C->preallocated   = PETSC_TRUE;
1760   C->factor         = A->factor;
1761   c->row            = 0;
1762   c->icol           = 0;
1763   c->saved_values   = 0;
1764   c->keepzeroedrows = a->keepzeroedrows;
1765   C->assembled      = PETSC_TRUE;
1766 
1767   c->bs         = a->bs;
1768   c->bs2        = a->bs2;
1769   c->mbs        = a->mbs;
1770   c->nbs        = a->nbs;
1771 
1772   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1773   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1774   for (i=0; i<mbs; i++) {
1775     c->imax[i] = a->imax[i];
1776     c->ilen[i] = a->ilen[i];
1777   }
1778 
1779   /* allocate the matrix space */
1780   c->singlemalloc = PETSC_TRUE;
1781   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1782   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1783   c->j = (int*)(c->a + nz*bs2);
1784   c->i = c->j + nz;
1785   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1786   if (mbs > 0) {
1787     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1788     if (cpvalues == MAT_COPY_VALUES) {
1789       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1790     } else {
1791       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1792     }
1793   }
1794 
1795   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1796   c->sorted      = a->sorted;
1797   c->roworiented = a->roworiented;
1798   c->nonew       = a->nonew;
1799 
1800   if (a->diag) {
1801     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1802     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1803     for (i=0; i<mbs; i++) {
1804       c->diag[i] = a->diag[i];
1805     }
1806   } else c->diag        = 0;
1807   c->s_nz               = a->s_nz;
1808   c->s_maxnz            = a->s_maxnz;
1809   c->solve_work         = 0;
1810   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1811   c->mult_work          = 0;
1812   *B = C;
1813   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1819 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1820 {
1821   Mat_SeqSBAIJ *a;
1822   Mat          B;
1823   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1824   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1825   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1826   int          *masked,nmask,tmp,bs2,ishift;
1827   PetscScalar  *aa;
1828   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1829 
1830   PetscFunctionBegin;
1831   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1832   bs2  = bs*bs;
1833 
1834   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1835   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1836   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1837   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1838   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1839   M = header[1]; N = header[2]; nz = header[3];
1840 
1841   if (header[3] < 0) {
1842     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1843   }
1844 
1845   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1846 
1847   /*
1848      This code adds extra rows to make sure the number of rows is
1849     divisible by the blocksize
1850   */
1851   mbs        = M/bs;
1852   extra_rows = bs - M + bs*(mbs);
1853   if (extra_rows == bs) extra_rows = 0;
1854   else                  mbs++;
1855   if (extra_rows) {
1856     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1857   }
1858 
1859   /* read in row lengths */
1860   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1861   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1862   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1863 
1864   /* read in column indices */
1865   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1866   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1867   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1868 
1869   /* loop over row lengths determining block row lengths */
1870   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1871   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1872   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1873   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1874   masked   = mask + mbs;
1875   rowcount = 0; nzcount = 0;
1876   for (i=0; i<mbs; i++) {
1877     nmask = 0;
1878     for (j=0; j<bs; j++) {
1879       kmax = rowlengths[rowcount];
1880       for (k=0; k<kmax; k++) {
1881         tmp = jj[nzcount++]/bs;   /* block col. index */
1882         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1883       }
1884       rowcount++;
1885     }
1886     s_browlengths[i] += nmask;
1887 
1888     /* zero out the mask elements we set */
1889     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1890   }
1891 
1892   /* create our matrix */
1893   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1894   ierr = MatSetType(B,type);CHKERRQ(ierr);
1895   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1896   a = (Mat_SeqSBAIJ*)B->data;
1897 
1898   /* set matrix "i" values */
1899   a->i[0] = 0;
1900   for (i=1; i<= mbs; i++) {
1901     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1902     a->ilen[i-1] = s_browlengths[i-1];
1903   }
1904   a->s_nz = a->i[mbs];
1905 
1906   /* read in nonzero values */
1907   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1908   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1909   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1910 
1911   /* set "a" and "j" values into matrix */
1912   nzcount = 0; jcount = 0;
1913   for (i=0; i<mbs; i++) {
1914     nzcountb = nzcount;
1915     nmask    = 0;
1916     for (j=0; j<bs; j++) {
1917       kmax = rowlengths[i*bs+j];
1918       for (k=0; k<kmax; k++) {
1919         tmp = jj[nzcount++]/bs; /* block col. index */
1920         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1921       }
1922     }
1923     /* sort the masked values */
1924     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1925 
1926     /* set "j" values into matrix */
1927     maskcount = 1;
1928     for (j=0; j<nmask; j++) {
1929       a->j[jcount++]  = masked[j];
1930       mask[masked[j]] = maskcount++;
1931     }
1932 
1933     /* set "a" values into matrix */
1934     ishift = bs2*a->i[i];
1935     for (j=0; j<bs; j++) {
1936       kmax = rowlengths[i*bs+j];
1937       for (k=0; k<kmax; k++) {
1938         tmp       = jj[nzcountb]/bs ; /* block col. index */
1939         if (tmp >= i){
1940           block     = mask[tmp] - 1;
1941           point     = jj[nzcountb] - bs*tmp;
1942           idx       = ishift + bs2*block + j + bs*point;
1943           a->a[idx] = aa[nzcountb];
1944         }
1945         nzcountb++;
1946       }
1947     }
1948     /* zero out the mask elements we set */
1949     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1950   }
1951   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1952 
1953   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1954   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1955   ierr = PetscFree(aa);CHKERRQ(ierr);
1956   ierr = PetscFree(jj);CHKERRQ(ierr);
1957   ierr = PetscFree(mask);CHKERRQ(ierr);
1958 
1959   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1960   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1961   ierr = MatView_Private(B);CHKERRQ(ierr);
1962   *A = B;
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1968 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1969 {
1970   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1971   MatScalar    *aa=a->a,*v,*v1;
1972   PetscScalar  *x,*b,*t,sum,d;
1973   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1974   int          nz,nz1,*vj,*vj1,i;
1975 
1976   PetscFunctionBegin;
1977   its = its*lits;
1978   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1979 
1980   if (bs > 1)
1981     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1982 
1983   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1984   if (xx != bb) {
1985     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1986   } else {
1987     b = x;
1988   }
1989 
1990   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1991 
1992   if (flag & SOR_ZERO_INITIAL_GUESS) {
1993     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1994       for (i=0; i<m; i++)
1995         t[i] = b[i];
1996 
1997       for (i=0; i<m; i++){
1998         d  = *(aa + ai[i]);  /* diag[i] */
1999         v  = aa + ai[i] + 1;
2000         vj = aj + ai[i] + 1;
2001         nz = ai[i+1] - ai[i] - 1;
2002         x[i] = omega*t[i]/d;
2003         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2004         PetscLogFlops(2*nz-1);
2005       }
2006     }
2007 
2008     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2009       for (i=0; i<m; i++)
2010         t[i] = b[i];
2011 
2012       for (i=0; i<m-1; i++){  /* update rhs */
2013         v  = aa + ai[i] + 1;
2014         vj = aj + ai[i] + 1;
2015         nz = ai[i+1] - ai[i] - 1;
2016         while (nz--) t[*vj++] -= x[i]*(*v++);
2017         PetscLogFlops(2*nz-1);
2018       }
2019       for (i=m-1; i>=0; i--){
2020         d  = *(aa + ai[i]);
2021         v  = aa + ai[i] + 1;
2022         vj = aj + ai[i] + 1;
2023         nz = ai[i+1] - ai[i] - 1;
2024         sum = t[i];
2025         while (nz--) sum -= x[*vj++]*(*v++);
2026         PetscLogFlops(2*nz-1);
2027         x[i] =   (1-omega)*x[i] + omega*sum/d;
2028       }
2029     }
2030     its--;
2031   }
2032 
2033   while (its--) {
2034     /*
2035        forward sweep:
2036        for i=0,...,m-1:
2037          sum[i] = (b[i] - U(i,:)x )/d[i];
2038          x[i]   = (1-omega)x[i] + omega*sum[i];
2039          b      = b - x[i]*U^T(i,:);
2040 
2041     */
2042     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2043       for (i=0; i<m; i++)
2044         t[i] = b[i];
2045 
2046       for (i=0; i<m; i++){
2047         d  = *(aa + ai[i]);  /* diag[i] */
2048         v  = aa + ai[i] + 1; v1=v;
2049         vj = aj + ai[i] + 1; vj1=vj;
2050         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2051         sum = t[i];
2052         while (nz1--) sum -= (*v1++)*x[*vj1++];
2053         x[i] = (1-omega)*x[i] + omega*sum/d;
2054         while (nz--) t[*vj++] -= x[i]*(*v++);
2055         PetscLogFlops(4*nz-2);
2056       }
2057     }
2058 
2059   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2060       /*
2061        backward sweep:
2062        b = b - x[i]*U^T(i,:), i=0,...,n-2
2063        for i=m-1,...,0:
2064          sum[i] = (b[i] - U(i,:)x )/d[i];
2065          x[i]   = (1-omega)x[i] + omega*sum[i];
2066       */
2067       for (i=0; i<m; i++)
2068         t[i] = b[i];
2069 
2070       for (i=0; i<m-1; i++){  /* update rhs */
2071         v  = aa + ai[i] + 1;
2072         vj = aj + ai[i] + 1;
2073         nz = ai[i+1] - ai[i] - 1;
2074         while (nz--) t[*vj++] -= x[i]*(*v++);
2075         PetscLogFlops(2*nz-1);
2076       }
2077       for (i=m-1; i>=0; i--){
2078         d  = *(aa + ai[i]);
2079         v  = aa + ai[i] + 1;
2080         vj = aj + ai[i] + 1;
2081         nz = ai[i+1] - ai[i] - 1;
2082         sum = t[i];
2083         while (nz--) sum -= x[*vj++]*(*v++);
2084         PetscLogFlops(2*nz-1);
2085         x[i] =   (1-omega)*x[i] + omega*sum/d;
2086       }
2087     }
2088   }
2089 
2090   ierr = PetscFree(t); CHKERRQ(ierr);
2091   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2092   if (bb != xx) {
2093     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2094   }
2095 
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 
2100 
2101 
2102 
2103 
2104