xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision e32c5f6eb58f4e49912dd3fbd0ec2f3434bff94f)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   default:
183     SETERRQ(PETSC_ERR_SUP,"unknown option");
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
191 {
192   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
193   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
194   MatScalar    *aa,*aa_i;
195   PetscScalar  *v_i;
196 
197   PetscFunctionBegin;
198   bs  = a->bs;
199   ai  = a->i;
200   aj  = a->j;
201   aa  = a->a;
202   bs2 = a->bs2;
203 
204   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
205 
206   bn  = row/bs;   /* Block number */
207   bp  = row % bs; /* Block position */
208   M   = ai[bn+1] - ai[bn];
209   *ncols = bs*M;
210 
211   if (v) {
212     *v = 0;
213     if (*ncols) {
214       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
215       for (i=0; i<M; i++) { /* for each block in the block row */
216         v_i  = *v + i*bs;
217         aa_i = aa + bs2*(ai[bn] + i);
218         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
219       }
220     }
221   }
222 
223   if (cols) {
224     *cols = 0;
225     if (*ncols) {
226       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
227       for (i=0; i<M; i++) { /* for each block in the block row */
228         cols_i = *cols + i*bs;
229         itmp  = bs*aj[ai[bn] + i];
230         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
231       }
232     }
233   }
234 
235   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
236   /* this segment is currently removed, so only entries in the upper triangle are obtained */
237 #ifdef column_search
238   v_i    = *v    + M*bs;
239   cols_i = *cols + M*bs;
240   for (i=0; i<bn; i++){ /* for each block row */
241     M = ai[i+1] - ai[i];
242     for (j=0; j<M; j++){
243       itmp = aj[ai[i] + j];    /* block column value */
244       if (itmp == bn){
245         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
246         for (k=0; k<bs; k++) {
247           *cols_i++ = i*bs+k;
248           *v_i++    = aa_i[k];
249         }
250         *ncols += bs;
251         break;
252       }
253     }
254   }
255 #endif
256 
257   PetscFunctionReturn(0);
258 }
259 
260 #undef __FUNCT__
261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
263 {
264   int ierr;
265 
266   PetscFunctionBegin;
267   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
268   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
275 {
276   int ierr;
277   PetscFunctionBegin;
278   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
285 {
286   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
287   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
288   PetscScalar  *aa;
289   FILE         *file;
290 
291   PetscFunctionBegin;
292   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
293   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
294   col_lens[0] = MAT_FILE_COOKIE;
295 
296   col_lens[1] = A->m;
297   col_lens[2] = A->m;
298   col_lens[3] = a->s_nz*bs2;
299 
300   /* store lengths of each row and write (including header) to file */
301   count = 0;
302   for (i=0; i<a->mbs; i++) {
303     for (j=0; j<bs; j++) {
304       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
305     }
306   }
307   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
308   ierr = PetscFree(col_lens);CHKERRQ(ierr);
309 
310   /* store column indices (zero start index) */
311   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
312   count = 0;
313   for (i=0; i<a->mbs; i++) {
314     for (j=0; j<bs; j++) {
315       for (k=a->i[i]; k<a->i[i+1]; k++) {
316         for (l=0; l<bs; l++) {
317           jj[count++] = bs*a->j[k] + l;
318         }
319       }
320     }
321   }
322   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
323   ierr = PetscFree(jj);CHKERRQ(ierr);
324 
325   /* store nonzero values */
326   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
327   count = 0;
328   for (i=0; i<a->mbs; i++) {
329     for (j=0; j<bs; j++) {
330       for (k=a->i[i]; k<a->i[i+1]; k++) {
331         for (l=0; l<bs; l++) {
332           aa[count++] = a->a[bs2*k + l*bs + j];
333         }
334       }
335     }
336   }
337   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
338   ierr = PetscFree(aa);CHKERRQ(ierr);
339 
340   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
341   if (file) {
342     fprintf(file,"-matload_block_size %d\n",a->bs);
343   }
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
350 {
351   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
352   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
353   char              *name;
354   PetscViewerFormat format;
355 
356   PetscFunctionBegin;
357   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
358   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
359   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
360     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
361   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
362     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
363   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
364     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
365     for (i=0; i<a->mbs; i++) {
366       for (j=0; j<bs; j++) {
367         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
368         for (k=a->i[i]; k<a->i[i+1]; k++) {
369           for (l=0; l<bs; l++) {
370 #if defined(PETSC_USE_COMPLEX)
371             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
372               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
373                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
374             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
375               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
376                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
377             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
378               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
379             }
380 #else
381             if (a->a[bs2*k + l*bs + j] != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
383             }
384 #endif
385           }
386         }
387         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
388       }
389     }
390     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
391   } else {
392     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
393     for (i=0; i<a->mbs; i++) {
394       for (j=0; j<bs; j++) {
395         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
396         for (k=a->i[i]; k<a->i[i+1]; k++) {
397           for (l=0; l<bs; l++) {
398 #if defined(PETSC_USE_COMPLEX)
399             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
400               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
401                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
402             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
403               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
404                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
405             } else {
406               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
407             }
408 #else
409             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
410 #endif
411           }
412         }
413         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
414       }
415     }
416     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
417   }
418   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
425 {
426   Mat          A = (Mat) Aa;
427   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
428   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
429   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
430   MatScalar    *aa;
431   MPI_Comm     comm;
432   PetscViewer  viewer;
433 
434   PetscFunctionBegin;
435   /*
436       This is nasty. If this is called from an originally parallel matrix
437    then all processes call this,but only the first has the matrix so the
438    rest should return immediately.
439   */
440   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
441   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
442   if (rank) PetscFunctionReturn(0);
443 
444   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
445 
446   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
447   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
448 
449   /* loop over matrix elements drawing boxes */
450   color = PETSC_DRAW_BLUE;
451   for (i=0,row=0; i<mbs; i++,row+=bs) {
452     for (j=a->i[i]; j<a->i[i+1]; j++) {
453       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
454       x_l = a->j[j]*bs; x_r = x_l + 1.0;
455       aa = a->a + j*bs2;
456       for (k=0; k<bs; k++) {
457         for (l=0; l<bs; l++) {
458           if (PetscRealPart(*aa++) >=  0.) continue;
459           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
460         }
461       }
462     }
463   }
464   color = PETSC_DRAW_CYAN;
465   for (i=0,row=0; i<mbs; i++,row+=bs) {
466     for (j=a->i[i]; j<a->i[i+1]; j++) {
467       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
468       x_l = a->j[j]*bs; x_r = x_l + 1.0;
469       aa = a->a + j*bs2;
470       for (k=0; k<bs; k++) {
471         for (l=0; l<bs; l++) {
472           if (PetscRealPart(*aa++) != 0.) continue;
473           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
474         }
475       }
476     }
477   }
478 
479   color = PETSC_DRAW_RED;
480   for (i=0,row=0; i<mbs; i++,row+=bs) {
481     for (j=a->i[i]; j<a->i[i+1]; j++) {
482       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
483       x_l = a->j[j]*bs; x_r = x_l + 1.0;
484       aa = a->a + j*bs2;
485       for (k=0; k<bs; k++) {
486         for (l=0; l<bs; l++) {
487           if (PetscRealPart(*aa++) <= 0.) continue;
488           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
499 {
500   int          ierr;
501   PetscReal    xl,yl,xr,yr,w,h;
502   PetscDraw    draw;
503   PetscTruth   isnull;
504 
505   PetscFunctionBegin;
506 
507   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
508   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
509 
510   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
511   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
512   xr += w;    yr += h;  xl = -w;     yl = -h;
513   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
514   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
515   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 #undef __FUNCT__
520 #define __FUNCT__ "MatView_SeqSBAIJ"
521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
522 {
523   int        ierr;
524   PetscTruth issocket,isascii,isbinary,isdraw;
525 
526   PetscFunctionBegin;
527   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
529   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
530   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
531   if (issocket) {
532     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
533   } else if (isascii){
534     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
535   } else if (isbinary) {
536     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
537   } else if (isdraw) {
538     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
539   } else {
540     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
541   }
542   PetscFunctionReturn(0);
543 }
544 
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
548 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
549 {
550   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
551   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
552   int        *ai = a->i,*ailen = a->ilen;
553   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
554   MatScalar  *ap,*aa = a->a,zero = 0.0;
555 
556   PetscFunctionBegin;
557   for (k=0; k<m; k++) { /* loop over rows */
558     row  = im[k]; brow = row/bs;
559     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
560     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
561     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
562     nrow = ailen[brow];
563     for (l=0; l<n; l++) { /* loop over columns */
564       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
565       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
566       col  = in[l] ;
567       bcol = col/bs;
568       cidx = col%bs;
569       ridx = row%bs;
570       high = nrow;
571       low  = 0; /* assume unsorted */
572       while (high-low > 5) {
573         t = (low+high)/2;
574         if (rp[t] > bcol) high = t;
575         else             low  = t;
576       }
577       for (i=low; i<high; i++) {
578         if (rp[i] > bcol) break;
579         if (rp[i] == bcol) {
580           *v++ = ap[bs2*i+bs*cidx+ridx];
581           goto finished;
582         }
583       }
584       *v++ = zero;
585       finished:;
586     }
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 
592 #undef __FUNCT__
593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
595 {
596   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
597   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
598   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
599   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
600   PetscTruth  roworiented=a->roworiented;
601   MatScalar   *value = v,*ap,*aa = a->a,*bap;
602 
603   PetscFunctionBegin;
604   if (roworiented) {
605     stepval = (n-1)*bs;
606   } else {
607     stepval = (m-1)*bs;
608   }
609   for (k=0; k<m; k++) { /* loop over added rows */
610     row  = im[k];
611     if (row < 0) continue;
612 #if defined(PETSC_USE_BOPT_g)
613     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
614 #endif
615     rp   = aj + ai[row];
616     ap   = aa + bs2*ai[row];
617     rmax = imax[row];
618     nrow = ailen[row];
619     low  = 0;
620     for (l=0; l<n; l++) { /* loop over added columns */
621       if (in[l] < 0) continue;
622 #if defined(PETSC_USE_BOPT_g)
623       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
624 #endif
625       col = in[l];
626       if (col < row) continue; /* ignore lower triangular block */
627       if (roworiented) {
628         value = v + k*(stepval+bs)*bs + l*bs;
629       } else {
630         value = v + l*(stepval+bs)*bs + k*bs;
631       }
632       if (!sorted) low = 0; high = nrow;
633       while (high-low > 7) {
634         t = (low+high)/2;
635         if (rp[t] > col) high = t;
636         else             low  = t;
637       }
638       for (i=low; i<high; i++) {
639         if (rp[i] > col) break;
640         if (rp[i] == col) {
641           bap  = ap +  bs2*i;
642           if (roworiented) {
643             if (is == ADD_VALUES) {
644               for (ii=0; ii<bs; ii++,value+=stepval) {
645                 for (jj=ii; jj<bs2; jj+=bs) {
646                   bap[jj] += *value++;
647                 }
648               }
649             } else {
650               for (ii=0; ii<bs; ii++,value+=stepval) {
651                 for (jj=ii; jj<bs2; jj+=bs) {
652                   bap[jj] = *value++;
653                 }
654               }
655             }
656           } else {
657             if (is == ADD_VALUES) {
658               for (ii=0; ii<bs; ii++,value+=stepval) {
659                 for (jj=0; jj<bs; jj++) {
660                   *bap++ += *value++;
661                 }
662               }
663             } else {
664               for (ii=0; ii<bs; ii++,value+=stepval) {
665                 for (jj=0; jj<bs; jj++) {
666                   *bap++  = *value++;
667                 }
668               }
669             }
670           }
671           goto noinsert2;
672         }
673       }
674       if (nonew == 1) goto noinsert2;
675       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
676       if (nrow >= rmax) {
677         /* there is no extra room in row, therefore enlarge */
678         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
679         MatScalar *new_a;
680 
681         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
682 
683         /* malloc new storage space */
684         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
685 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
686         new_j   = (int*)(new_a + bs2*new_nz);
687         new_i   = new_j + new_nz;
688 
689         /* copy over old data into new slots */
690         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
691         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
692         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
693         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
694         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
695         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
696         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
697         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
698         /* free up old matrix storage */
699         ierr = PetscFree(a->a);CHKERRQ(ierr);
700         if (!a->singlemalloc) {
701           ierr = PetscFree(a->i);CHKERRQ(ierr);
702           ierr = PetscFree(a->j);CHKERRQ(ierr);
703         }
704         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
705         a->singlemalloc = PETSC_TRUE;
706 
707         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
708         rmax = imax[row] = imax[row] + CHUNKSIZE;
709         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
710         a->s_maxnz += bs2*CHUNKSIZE;
711         a->reallocs++;
712         a->s_nz++;
713       }
714       N = nrow++ - 1;
715       /* shift up all the later entries in this row */
716       for (ii=N; ii>=i; ii--) {
717         rp[ii+1] = rp[ii];
718         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
719       }
720       if (N >= i) {
721         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
722       }
723       rp[i] = col;
724       bap   = ap +  bs2*i;
725       if (roworiented) {
726         for (ii=0; ii<bs; ii++,value+=stepval) {
727           for (jj=ii; jj<bs2; jj+=bs) {
728             bap[jj] = *value++;
729           }
730         }
731       } else {
732         for (ii=0; ii<bs; ii++,value+=stepval) {
733           for (jj=0; jj<bs; jj++) {
734             *bap++  = *value++;
735           }
736         }
737       }
738       noinsert2:;
739       low = i;
740     }
741     ailen[row] = nrow;
742   }
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
748 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
749 {
750   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
751   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
752   int        m = A->m,*ip,N,*ailen = a->ilen;
753   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
754   MatScalar  *aa = a->a,*ap;
755 #if defined(PETSC_HAVE_SPOOLES)
756   PetscTruth   flag;
757 #endif
758 
759   PetscFunctionBegin;
760   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
761 
762   if (m) rmax = ailen[0];
763   for (i=1; i<mbs; i++) {
764     /* move each row back by the amount of empty slots (fshift) before it*/
765     fshift += imax[i-1] - ailen[i-1];
766     rmax   = PetscMax(rmax,ailen[i]);
767     if (fshift) {
768       ip = aj + ai[i]; ap = aa + bs2*ai[i];
769       N = ailen[i];
770       for (j=0; j<N; j++) {
771         ip[j-fshift] = ip[j];
772         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
773       }
774     }
775     ai[i] = ai[i-1] + ailen[i-1];
776   }
777   if (mbs) {
778     fshift += imax[mbs-1] - ailen[mbs-1];
779     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
780   }
781   /* reset ilen and imax for each row */
782   for (i=0; i<mbs; i++) {
783     ailen[i] = imax[i] = ai[i+1] - ai[i];
784   }
785   a->s_nz = ai[mbs];
786 
787   /* diagonals may have moved, reset it */
788   if (a->diag) {
789     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
790   }
791   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
792            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
793   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
794            a->reallocs);
795   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
796   a->reallocs          = 0;
797   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
798 
799 #if defined(PETSC_HAVE_SPOOLES)
800   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
801   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); }
802 #endif
803 
804   PetscFunctionReturn(0);
805 }
806 
807 /*
808    This function returns an array of flags which indicate the locations of contiguous
809    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
810    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
811    Assume: sizes should be long enough to hold all the values.
812 */
813 #undef __FUNCT__
814 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
815 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
816 {
817   int        i,j,k,row;
818   PetscTruth flg;
819 
820   PetscFunctionBegin;
821   for (i=0,j=0; i<n; j++) {
822     row = idx[i];
823     if (row%bs!=0) { /* Not the begining of a block */
824       sizes[j] = 1;
825       i++;
826     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
827       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
828       i++;
829     } else { /* Begining of the block, so check if the complete block exists */
830       flg = PETSC_TRUE;
831       for (k=1; k<bs; k++) {
832         if (row+k != idx[i+k]) { /* break in the block */
833           flg = PETSC_FALSE;
834           break;
835         }
836       }
837       if (flg == PETSC_TRUE) { /* No break in the bs */
838         sizes[j] = bs;
839         i+= bs;
840       } else {
841         sizes[j] = 1;
842         i++;
843       }
844     }
845   }
846   *bs_max = j;
847   PetscFunctionReturn(0);
848 }
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
852 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag)
853 {
854   PetscFunctionBegin;
855   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
856 }
857 
858 /* Only add/insert a(i,j) with i<=j (blocks).
859    Any a(i,j) with i>j input by user is ingored.
860 */
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
864 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
865 {
866   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
867   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
868   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
869   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
870   int         ridx,cidx,bs2=a->bs2,ierr;
871   MatScalar   *ap,value,*aa=a->a,*bap;
872 
873   PetscFunctionBegin;
874 
875   for (k=0; k<m; k++) { /* loop over added rows */
876     row  = im[k];       /* row number */
877     brow = row/bs;      /* block row number */
878     if (row < 0) continue;
879 #if defined(PETSC_USE_BOPT_g)
880     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
881 #endif
882     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
883     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
884     rmax = imax[brow];  /* maximum space allocated for this row */
885     nrow = ailen[brow]; /* actual length of this row */
886     low  = 0;
887 
888     for (l=0; l<n; l++) { /* loop over added columns */
889       if (in[l] < 0) continue;
890 #if defined(PETSC_USE_BOPT_g)
891       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
892 #endif
893       col = in[l];
894       bcol = col/bs;              /* block col number */
895 
896       if (brow <= bcol){
897         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
898         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
899           /* element value a(k,l) */
900           if (roworiented) {
901             value = v[l + k*n];
902           } else {
903             value = v[k + l*m];
904           }
905 
906           /* move pointer bap to a(k,l) quickly and add/insert value */
907           if (!sorted) low = 0; high = nrow;
908           while (high-low > 7) {
909             t = (low+high)/2;
910             if (rp[t] > bcol) high = t;
911             else              low  = t;
912           }
913           for (i=low; i<high; i++) {
914             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
915             if (rp[i] > bcol) break;
916             if (rp[i] == bcol) {
917               bap  = ap +  bs2*i + bs*cidx + ridx;
918               if (is == ADD_VALUES) *bap += value;
919               else                  *bap  = value;
920               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
921               if (brow == bcol && ridx < cidx){
922                 bap  = ap +  bs2*i + bs*ridx + cidx;
923                 if (is == ADD_VALUES) *bap += value;
924                 else                  *bap  = value;
925               }
926               goto noinsert1;
927             }
928           }
929 
930       if (nonew == 1) goto noinsert1;
931       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
932       if (nrow >= rmax) {
933         /* there is no extra room in row, therefore enlarge */
934         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
935         MatScalar *new_a;
936 
937         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
938 
939         /* Malloc new storage space */
940         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
941         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
942         new_j = (int*)(new_a + bs2*new_nz);
943         new_i = new_j + new_nz;
944 
945         /* copy over old data into new slots */
946         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
947         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
948         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
949         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
950         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
951         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
952         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
953         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
954         /* free up old matrix storage */
955         ierr = PetscFree(a->a);CHKERRQ(ierr);
956         if (!a->singlemalloc) {
957           ierr = PetscFree(a->i);CHKERRQ(ierr);
958           ierr = PetscFree(a->j);CHKERRQ(ierr);
959         }
960         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
961         a->singlemalloc = PETSC_TRUE;
962 
963         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
964         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
965         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
966         a->s_maxnz += bs2*CHUNKSIZE;
967         a->reallocs++;
968         a->s_nz++;
969       }
970 
971       N = nrow++ - 1;
972       /* shift up all the later entries in this row */
973       for (ii=N; ii>=i; ii--) {
974         rp[ii+1] = rp[ii];
975         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
976       }
977       if (N>=i) {
978         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
979       }
980       rp[i]                      = bcol;
981       ap[bs2*i + bs*cidx + ridx] = value;
982       noinsert1:;
983       low = i;
984       /* } */
985         }
986       } /* end of if .. if..  */
987     }                     /* end of loop over added columns */
988     ailen[brow] = nrow;
989   }                       /* end of loop over added rows */
990 
991   PetscFunctionReturn(0);
992 }
993 
994 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
995 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
996 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
997 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
998 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
999 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
1000 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
1001 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat);
1002 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
1003 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1004 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1005 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1006 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1007 extern int MatZeroEntries_SeqSBAIJ(Mat);
1008 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1009 
1010 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1011 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1012 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1013 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1014 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1015 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1016 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1017 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1018 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1019 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1020 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1021 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1022 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1023 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1024 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1025 
1026 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1027 
1028 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1029 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1030 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1031 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1032 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1033 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1034 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1035 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1036 
1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1045 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1046 
1047 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1048 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1049 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1050 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1051 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1052 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1053 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1054 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1055 
1056 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1057 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1058 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1059 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1060 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1061 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1062 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1063 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1064 
1065 #ifdef HAVE_MatICCFactor
1066 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1069 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1070 {
1071   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1072   Mat         outA;
1073   int         ierr;
1074   PetscTruth  row_identity,col_identity;
1075 
1076   PetscFunctionBegin;
1077   /*
1078   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1079   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1080   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1081   if (!row_identity || !col_identity) {
1082     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1083   }
1084   */
1085 
1086   outA          = inA;
1087   inA->factor   = FACTOR_CHOLESKY;
1088 
1089   if (!a->diag) {
1090     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1091   }
1092   /*
1093       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1094       for ILU(0) factorization with natural ordering
1095   */
1096   switch (a->bs) {
1097   case 1:
1098     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1099     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1100     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1101     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1102   case 2:
1103     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1104     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1105     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1106     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1107     break;
1108   case 3:
1109     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1110     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1111     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1112     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1113     break;
1114   case 4:
1115     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1116     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1117     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1118     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1119     break;
1120   case 5:
1121     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1122     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1123     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1124     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1125     break;
1126   case 6:
1127     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1128     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1129     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1130     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1131     break;
1132   case 7:
1133     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1134     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1135     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1136     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1137     break;
1138   default:
1139     a->row        = row;
1140     a->icol       = col;
1141     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1142     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1143 
1144     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1145     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1146     PetscLogObjectParent(inA,a->icol);
1147 
1148     if (!a->solve_work) {
1149       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1150       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1151     }
1152   }
1153 
1154   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1155 
1156   PetscFunctionReturn(0);
1157 }
1158 #endif
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1162 int MatPrintHelp_SeqSBAIJ(Mat A)
1163 {
1164   static PetscTruth called = PETSC_FALSE;
1165   MPI_Comm          comm = A->comm;
1166   int               ierr;
1167 
1168   PetscFunctionBegin;
1169   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1170   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1171   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 EXTERN_C_BEGIN
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1178 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1179 {
1180   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1181   int         i,nz,n;
1182 
1183   PetscFunctionBegin;
1184   nz = baij->s_maxnz;
1185   n  = mat->n;
1186   for (i=0; i<nz; i++) {
1187     baij->j[i] = indices[i];
1188   }
1189   baij->s_nz = nz;
1190   for (i=0; i<n; i++) {
1191     baij->ilen[i] = baij->imax[i];
1192   }
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 EXTERN_C_END
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1200 /*@
1201     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1202        in the matrix.
1203 
1204   Input Parameters:
1205 +  mat     - the SeqSBAIJ matrix
1206 -  indices - the column indices
1207 
1208   Level: advanced
1209 
1210   Notes:
1211     This can be called if you have precomputed the nonzero structure of the
1212   matrix and want to provide it to the matrix object to improve the performance
1213   of the MatSetValues() operation.
1214 
1215     You MUST have set the correct numbers of nonzeros per row in the call to
1216   MatCreateSeqSBAIJ().
1217 
1218     MUST be called before any calls to MatSetValues()
1219 
1220 .seealso: MatCreateSeqSBAIJ
1221 @*/
1222 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1223 {
1224   int ierr,(*f)(Mat,int *);
1225 
1226   PetscFunctionBegin;
1227   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1228   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1229   if (f) {
1230     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1231   } else {
1232     SETERRQ(1,"Wrong type of matrix to set column indices");
1233   }
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1239 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1240 {
1241   int        ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1250 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array)
1251 {
1252   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1253   PetscFunctionBegin;
1254   *array = a->a;
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1260 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array)
1261 {
1262   PetscFunctionBegin;
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #include "petscblaslapack.h"
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1269 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1270 {
1271   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1272   int          ierr,one=1,i,bs=y->bs,bs2,j;
1273 
1274   PetscFunctionBegin;
1275   if (str == SAME_NONZERO_PATTERN) {
1276     BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one);
1277   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1278     if (y->xtoy && y->XtoY != X) {
1279       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1280       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1281     }
1282     if (!y->xtoy) { /* get xtoy */
1283       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1284       y->XtoY = X;
1285     }
1286     bs2 = bs*bs;
1287     for (i=0; i<x->s_nz; i++) {
1288       j = 0;
1289       while (j < bs2){
1290         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1291         j++;
1292       }
1293     }
1294     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz));
1295   } else {
1296     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1297   }
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 /* -------------------------------------------------------------------*/
1302 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1303        MatGetRow_SeqSBAIJ,
1304        MatRestoreRow_SeqSBAIJ,
1305        MatMult_SeqSBAIJ_N,
1306        MatMultAdd_SeqSBAIJ_N,
1307        MatMultTranspose_SeqSBAIJ,
1308        MatMultTransposeAdd_SeqSBAIJ,
1309        MatSolve_SeqSBAIJ_N,
1310        0,
1311        0,
1312        0,
1313        0,
1314        MatCholeskyFactor_SeqSBAIJ,
1315        MatRelax_SeqSBAIJ,
1316        MatTranspose_SeqSBAIJ,
1317        MatGetInfo_SeqSBAIJ,
1318        MatEqual_SeqSBAIJ,
1319        MatGetDiagonal_SeqSBAIJ,
1320        MatDiagonalScale_SeqSBAIJ,
1321        MatNorm_SeqSBAIJ,
1322        0,
1323        MatAssemblyEnd_SeqSBAIJ,
1324        0,
1325        MatSetOption_SeqSBAIJ,
1326        MatZeroEntries_SeqSBAIJ,
1327        MatZeroRows_SeqSBAIJ,
1328        0,
1329        0,
1330        MatCholeskyFactorSymbolic_SeqSBAIJ,
1331        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1332        MatSetUpPreallocation_SeqSBAIJ,
1333        0,
1334        MatICCFactorSymbolic_SeqSBAIJ,
1335        MatGetArray_SeqSBAIJ,
1336        MatRestoreArray_SeqSBAIJ,
1337        MatDuplicate_SeqSBAIJ,
1338        0,
1339        0,
1340        0,
1341        0,
1342        MatAXPY_SeqSBAIJ,
1343        MatGetSubMatrices_SeqSBAIJ,
1344        MatIncreaseOverlap_SeqSBAIJ,
1345        MatGetValues_SeqSBAIJ,
1346        0,
1347        MatPrintHelp_SeqSBAIJ,
1348        MatScale_SeqSBAIJ,
1349        0,
1350        0,
1351        0,
1352        MatGetBlockSize_SeqSBAIJ,
1353        MatGetRowIJ_SeqSBAIJ,
1354        MatRestoreRowIJ_SeqSBAIJ,
1355        0,
1356        0,
1357        0,
1358        0,
1359        0,
1360        0,
1361        MatSetValuesBlocked_SeqSBAIJ,
1362        MatGetSubMatrix_SeqSBAIJ,
1363        0,
1364        0,
1365        MatGetPetscMaps_Petsc,
1366        0,
1367        0,
1368        0,
1369        0,
1370        0,
1371        0,
1372        MatGetRowMax_SeqSBAIJ,
1373        0,
1374        0,
1375        0,
1376        0,
1377        0,
1378        0,
1379        0,
1380        0,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386 #if !defined(PETSC_USE_COMPLEX)
1387        MatGetInertia_SeqSBAIJ
1388 #else
1389        0
1390 #endif
1391 };
1392 
1393 EXTERN_C_BEGIN
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1396 int MatStoreValues_SeqSBAIJ(Mat mat)
1397 {
1398   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1399   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1400   int         ierr;
1401 
1402   PetscFunctionBegin;
1403   if (aij->nonew != 1) {
1404     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1405   }
1406 
1407   /* allocate space for values if not already there */
1408   if (!aij->saved_values) {
1409     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1410   }
1411 
1412   /* copy values over */
1413   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 EXTERN_C_END
1417 
1418 EXTERN_C_BEGIN
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1421 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1422 {
1423   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1424   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1425 
1426   PetscFunctionBegin;
1427   if (aij->nonew != 1) {
1428     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1429   }
1430   if (!aij->saved_values) {
1431     SETERRQ(1,"Must call MatStoreValues(A);first");
1432   }
1433 
1434   /* copy values over */
1435   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 EXTERN_C_END
1439 
1440 EXTERN_C_BEGIN
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1443 int MatCreate_SeqSBAIJ(Mat B)
1444 {
1445   Mat_SeqSBAIJ *b;
1446   int          ierr,size;
1447 
1448   PetscFunctionBegin;
1449   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1450   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1451   B->m = B->M = PetscMax(B->m,B->M);
1452   B->n = B->N = PetscMax(B->n,B->N);
1453 
1454   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1455   B->data = (void*)b;
1456   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1457   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1458   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1459   B->ops->view        = MatView_SeqSBAIJ;
1460   B->factor           = 0;
1461   B->lupivotthreshold = 1.0;
1462   B->mapping          = 0;
1463   b->row              = 0;
1464   b->icol             = 0;
1465   b->reallocs         = 0;
1466   b->saved_values     = 0;
1467 
1468   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1469   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1470 
1471   b->sorted           = PETSC_FALSE;
1472   b->roworiented      = PETSC_TRUE;
1473   b->nonew            = 0;
1474   b->diag             = 0;
1475   b->solve_work       = 0;
1476   b->mult_work        = 0;
1477   B->spptr            = 0;
1478   b->keepzeroedrows   = PETSC_FALSE;
1479   b->xtoy             = 0;
1480   b->XtoY             = 0;
1481 
1482   b->inew             = 0;
1483   b->jnew             = 0;
1484   b->anew             = 0;
1485   b->a2anew           = 0;
1486   b->permute          = PETSC_FALSE;
1487 
1488   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1489                                      "MatStoreValues_SeqSBAIJ",
1490                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1491   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1492                                      "MatRetrieveValues_SeqSBAIJ",
1493                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1494   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1495                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1496                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 EXTERN_C_END
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1503 /*@C
1504    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1505    compressed row) format.  For good matrix assembly performance the
1506    user should preallocate the matrix storage by setting the parameter nz
1507    (or the array nnz).  By setting these parameters accurately, performance
1508    during matrix assembly can be increased by more than a factor of 50.
1509 
1510    Collective on Mat
1511 
1512    Input Parameters:
1513 +  A - the symmetric matrix
1514 .  bs - size of block
1515 .  nz - number of block nonzeros per block row (same for all rows)
1516 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1517          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1518 
1519    Options Database Keys:
1520 .   -mat_no_unroll - uses code that does not unroll the loops in the
1521                      block calculations (much slower)
1522 .    -mat_block_size - size of the blocks to use
1523 
1524    Level: intermediate
1525 
1526    Notes:
1527    Specify the preallocated storage with either nz or nnz (not both).
1528    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1529    allocation.  For additional details, see the users manual chapter on
1530    matrices.
1531 
1532 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1533 @*/
1534 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1535 {
1536   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1537   int          i,len,ierr,mbs,bs2;
1538   PetscTruth   flg;
1539   int          s_nz;
1540 
1541   PetscFunctionBegin;
1542   B->preallocated = PETSC_TRUE;
1543   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1544   mbs  = B->m/bs;
1545   bs2  = bs*bs;
1546 
1547   if (mbs*bs != B->m) {
1548     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1549   }
1550 
1551   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1552   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1553   if (nnz) {
1554     for (i=0; i<mbs; i++) {
1555       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1556       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1557     }
1558   }
1559 
1560   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1561   if (!flg) {
1562     switch (bs) {
1563     case 1:
1564       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1565       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1566       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1567       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1568       B->ops->mult            = MatMult_SeqSBAIJ_1;
1569       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1570       break;
1571     case 2:
1572       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1573       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1574       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1575       B->ops->mult            = MatMult_SeqSBAIJ_2;
1576       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1577       break;
1578     case 3:
1579       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1580       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1581       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1582       B->ops->mult            = MatMult_SeqSBAIJ_3;
1583       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1584       break;
1585     case 4:
1586       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1587       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1588       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1589       B->ops->mult            = MatMult_SeqSBAIJ_4;
1590       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1591       break;
1592     case 5:
1593       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1594       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1595       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1596       B->ops->mult            = MatMult_SeqSBAIJ_5;
1597       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1598       break;
1599     case 6:
1600       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1601       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1602       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1603       B->ops->mult            = MatMult_SeqSBAIJ_6;
1604       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1605       break;
1606     case 7:
1607       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1608       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1609       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1610       B->ops->mult            = MatMult_SeqSBAIJ_7;
1611       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1612       break;
1613     }
1614   }
1615 
1616   b->mbs = mbs;
1617   b->nbs = mbs;
1618   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1619   if (!nnz) {
1620     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1621     else if (nz <= 0)        nz = 1;
1622     for (i=0; i<mbs; i++) {
1623       b->imax[i] = nz;
1624     }
1625     nz = nz*mbs; /* total nz */
1626   } else {
1627     nz = 0;
1628     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1629   }
1630   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1631   s_nz = nz;
1632 
1633   /* allocate the matrix space */
1634   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1635   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1636   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1637   b->j = (int*)(b->a + s_nz*bs2);
1638   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1639   b->i = b->j + s_nz;
1640   b->singlemalloc = PETSC_TRUE;
1641 
1642   /* pointer to beginning of each row */
1643   b->i[0] = 0;
1644   for (i=1; i<mbs+1; i++) {
1645     b->i[i] = b->i[i-1] + b->imax[i-1];
1646   }
1647 
1648   /* b->ilen will count nonzeros in each block row so far. */
1649   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1650   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1651   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1652 
1653   b->bs               = bs;
1654   b->bs2              = bs2;
1655   b->s_nz             = 0;
1656   b->s_maxnz          = s_nz*bs2;
1657 
1658   b->inew             = 0;
1659   b->jnew             = 0;
1660   b->anew             = 0;
1661   b->a2anew           = 0;
1662   b->permute          = PETSC_FALSE;
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatCreateSeqSBAIJ"
1669 /*@C
1670    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1671    compressed row) format.  For good matrix assembly performance the
1672    user should preallocate the matrix storage by setting the parameter nz
1673    (or the array nnz).  By setting these parameters accurately, performance
1674    during matrix assembly can be increased by more than a factor of 50.
1675 
1676    Collective on MPI_Comm
1677 
1678    Input Parameters:
1679 +  comm - MPI communicator, set to PETSC_COMM_SELF
1680 .  bs - size of block
1681 .  m - number of rows, or number of columns
1682 .  nz - number of block nonzeros per block row (same for all rows)
1683 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1684          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1685 
1686    Output Parameter:
1687 .  A - the symmetric matrix
1688 
1689    Options Database Keys:
1690 .   -mat_no_unroll - uses code that does not unroll the loops in the
1691                      block calculations (much slower)
1692 .    -mat_block_size - size of the blocks to use
1693 
1694    Level: intermediate
1695 
1696    Notes:
1697 
1698    Specify the preallocated storage with either nz or nnz (not both).
1699    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1700    allocation.  For additional details, see the users manual chapter on
1701    matrices.
1702 
1703 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1704 @*/
1705 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1706 {
1707   int ierr;
1708 
1709   PetscFunctionBegin;
1710   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1711   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1712   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1713   PetscFunctionReturn(0);
1714 }
1715 
1716 #undef __FUNCT__
1717 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1718 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1719 {
1720   Mat         C;
1721   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1722   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1723 
1724   PetscFunctionBegin;
1725   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1726 
1727   *B = 0;
1728   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1729   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1730   c    = (Mat_SeqSBAIJ*)C->data;
1731 
1732   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1733   C->preallocated   = PETSC_TRUE;
1734   C->factor         = A->factor;
1735   c->row            = 0;
1736   c->icol           = 0;
1737   c->saved_values   = 0;
1738   c->keepzeroedrows = a->keepzeroedrows;
1739   C->assembled      = PETSC_TRUE;
1740 
1741   c->bs         = a->bs;
1742   c->bs2        = a->bs2;
1743   c->mbs        = a->mbs;
1744   c->nbs        = a->nbs;
1745 
1746   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1747   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1748   for (i=0; i<mbs; i++) {
1749     c->imax[i] = a->imax[i];
1750     c->ilen[i] = a->ilen[i];
1751   }
1752 
1753   /* allocate the matrix space */
1754   c->singlemalloc = PETSC_TRUE;
1755   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1756   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1757   c->j = (int*)(c->a + nz*bs2);
1758   c->i = c->j + nz;
1759   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1760   if (mbs > 0) {
1761     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1762     if (cpvalues == MAT_COPY_VALUES) {
1763       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1764     } else {
1765       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1766     }
1767   }
1768 
1769   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1770   c->sorted      = a->sorted;
1771   c->roworiented = a->roworiented;
1772   c->nonew       = a->nonew;
1773 
1774   if (a->diag) {
1775     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1776     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1777     for (i=0; i<mbs; i++) {
1778       c->diag[i] = a->diag[i];
1779     }
1780   } else c->diag        = 0;
1781   c->s_nz               = a->s_nz;
1782   c->s_maxnz            = a->s_maxnz;
1783   c->solve_work         = 0;
1784   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1785   c->mult_work          = 0;
1786   *B = C;
1787   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 EXTERN_C_BEGIN
1792 #undef __FUNCT__
1793 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1794 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1795 {
1796   Mat_SeqSBAIJ *a;
1797   Mat          B;
1798   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1799   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1800   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1801   int          *masked,nmask,tmp,bs2,ishift;
1802   PetscScalar  *aa;
1803   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1804 #if defined(PETSC_HAVE_SPOOLES)
1805   PetscTruth   flag;
1806 #endif
1807 
1808   PetscFunctionBegin;
1809   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1810   bs2  = bs*bs;
1811 
1812   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1813   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1814   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1815   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1816   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1817   M = header[1]; N = header[2]; nz = header[3];
1818 
1819   if (header[3] < 0) {
1820     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1821   }
1822 
1823   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1824 
1825   /*
1826      This code adds extra rows to make sure the number of rows is
1827     divisible by the blocksize
1828   */
1829   mbs        = M/bs;
1830   extra_rows = bs - M + bs*(mbs);
1831   if (extra_rows == bs) extra_rows = 0;
1832   else                  mbs++;
1833   if (extra_rows) {
1834     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1835   }
1836 
1837   /* read in row lengths */
1838   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1839   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1840   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1841 
1842   /* read in column indices */
1843   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1844   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1845   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1846 
1847   /* loop over row lengths determining block row lengths */
1848   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1849   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1850   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1851   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1852   masked   = mask + mbs;
1853   rowcount = 0; nzcount = 0;
1854   for (i=0; i<mbs; i++) {
1855     nmask = 0;
1856     for (j=0; j<bs; j++) {
1857       kmax = rowlengths[rowcount];
1858       for (k=0; k<kmax; k++) {
1859         tmp = jj[nzcount++]/bs;   /* block col. index */
1860         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1861       }
1862       rowcount++;
1863     }
1864     s_browlengths[i] += nmask;
1865 
1866     /* zero out the mask elements we set */
1867     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1868   }
1869 
1870   /* create our matrix */
1871   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr);
1872   B = *A;
1873   a = (Mat_SeqSBAIJ*)B->data;
1874 
1875   /* set matrix "i" values */
1876   a->i[0] = 0;
1877   for (i=1; i<= mbs; i++) {
1878     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1879     a->ilen[i-1] = s_browlengths[i-1];
1880   }
1881   a->s_nz = a->i[mbs];
1882 
1883   /* read in nonzero values */
1884   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1885   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1886   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1887 
1888   /* set "a" and "j" values into matrix */
1889   nzcount = 0; jcount = 0;
1890   for (i=0; i<mbs; i++) {
1891     nzcountb = nzcount;
1892     nmask    = 0;
1893     for (j=0; j<bs; j++) {
1894       kmax = rowlengths[i*bs+j];
1895       for (k=0; k<kmax; k++) {
1896         tmp = jj[nzcount++]/bs; /* block col. index */
1897         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1898       }
1899     }
1900     /* sort the masked values */
1901     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1902 
1903     /* set "j" values into matrix */
1904     maskcount = 1;
1905     for (j=0; j<nmask; j++) {
1906       a->j[jcount++]  = masked[j];
1907       mask[masked[j]] = maskcount++;
1908     }
1909 
1910     /* set "a" values into matrix */
1911     ishift = bs2*a->i[i];
1912     for (j=0; j<bs; j++) {
1913       kmax = rowlengths[i*bs+j];
1914       for (k=0; k<kmax; k++) {
1915         tmp       = jj[nzcountb]/bs ; /* block col. index */
1916         if (tmp >= i){
1917           block     = mask[tmp] - 1;
1918           point     = jj[nzcountb] - bs*tmp;
1919           idx       = ishift + bs2*block + j + bs*point;
1920           a->a[idx] = aa[nzcountb];
1921         }
1922         nzcountb++;
1923       }
1924     }
1925     /* zero out the mask elements we set */
1926     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1927   }
1928   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1929 
1930   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1931   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1932   ierr = PetscFree(aa);CHKERRQ(ierr);
1933   ierr = PetscFree(jj);CHKERRQ(ierr);
1934   ierr = PetscFree(mask);CHKERRQ(ierr);
1935 
1936   B->assembled = PETSC_TRUE;
1937 #if defined(PETSC_HAVE_SPOOLES)
1938   ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
1939   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(B);CHKERRQ(ierr); }
1940 #endif
1941   ierr = MatView_Private(B);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 EXTERN_C_END
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1948 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1949 {
1950   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1951   MatScalar    *aa=a->a,*v,*v1;
1952   PetscScalar  *x,*b,*t,sum,d;
1953   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1954   int          nz,nz1,*vj,*vj1,i;
1955 
1956   PetscFunctionBegin;
1957   its = its*lits;
1958   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1959 
1960   if (bs > 1)
1961     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1962 
1963   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1964   if (xx != bb) {
1965     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1966   } else {
1967     b = x;
1968   }
1969 
1970   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1971 
1972   if (flag & SOR_ZERO_INITIAL_GUESS) {
1973     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1974       for (i=0; i<m; i++)
1975         t[i] = b[i];
1976 
1977       for (i=0; i<m; i++){
1978         d  = *(aa + ai[i]);  /* diag[i] */
1979         v  = aa + ai[i] + 1;
1980         vj = aj + ai[i] + 1;
1981         nz = ai[i+1] - ai[i] - 1;
1982         x[i] = omega*t[i]/d;
1983         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1984         PetscLogFlops(2*nz-1);
1985       }
1986     }
1987 
1988     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1989       for (i=0; i<m; i++)
1990         t[i] = b[i];
1991 
1992       for (i=0; i<m-1; i++){  /* update rhs */
1993         v  = aa + ai[i] + 1;
1994         vj = aj + ai[i] + 1;
1995         nz = ai[i+1] - ai[i] - 1;
1996         while (nz--) t[*vj++] -= x[i]*(*v++);
1997         PetscLogFlops(2*nz-1);
1998       }
1999       for (i=m-1; i>=0; i--){
2000         d  = *(aa + ai[i]);
2001         v  = aa + ai[i] + 1;
2002         vj = aj + ai[i] + 1;
2003         nz = ai[i+1] - ai[i] - 1;
2004         sum = t[i];
2005         while (nz--) sum -= x[*vj++]*(*v++);
2006         PetscLogFlops(2*nz-1);
2007         x[i] =   (1-omega)*x[i] + omega*sum/d;
2008       }
2009     }
2010     its--;
2011   }
2012 
2013   while (its--) {
2014     /*
2015        forward sweep:
2016        for i=0,...,m-1:
2017          sum[i] = (b[i] - U(i,:)x )/d[i];
2018          x[i]   = (1-omega)x[i] + omega*sum[i];
2019          b      = b - x[i]*U^T(i,:);
2020 
2021     */
2022     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2023       for (i=0; i<m; i++)
2024         t[i] = b[i];
2025 
2026       for (i=0; i<m; i++){
2027         d  = *(aa + ai[i]);  /* diag[i] */
2028         v  = aa + ai[i] + 1; v1=v;
2029         vj = aj + ai[i] + 1; vj1=vj;
2030         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2031         sum = t[i];
2032         while (nz1--) sum -= (*v1++)*x[*vj1++];
2033         x[i] = (1-omega)*x[i] + omega*sum/d;
2034         while (nz--) t[*vj++] -= x[i]*(*v++);
2035         PetscLogFlops(4*nz-2);
2036       }
2037     }
2038 
2039   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2040       /*
2041        backward sweep:
2042        b = b - x[i]*U^T(i,:), i=0,...,n-2
2043        for i=m-1,...,0:
2044          sum[i] = (b[i] - U(i,:)x )/d[i];
2045          x[i]   = (1-omega)x[i] + omega*sum[i];
2046       */
2047       for (i=0; i<m; i++)
2048         t[i] = b[i];
2049 
2050       for (i=0; i<m-1; i++){  /* update rhs */
2051         v  = aa + ai[i] + 1;
2052         vj = aj + ai[i] + 1;
2053         nz = ai[i+1] - ai[i] - 1;
2054         while (nz--) t[*vj++] -= x[i]*(*v++);
2055         PetscLogFlops(2*nz-1);
2056       }
2057       for (i=m-1; i>=0; i--){
2058         d  = *(aa + ai[i]);
2059         v  = aa + ai[i] + 1;
2060         vj = aj + ai[i] + 1;
2061         nz = ai[i+1] - ai[i] - 1;
2062         sum = t[i];
2063         while (nz--) sum -= x[*vj++]*(*v++);
2064         PetscLogFlops(2*nz-1);
2065         x[i] =   (1-omega)*x[i] + omega*sum/d;
2066       }
2067     }
2068   }
2069 
2070   ierr = PetscFree(t); CHKERRQ(ierr);
2071   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2072   if (bb != xx) {
2073     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2074   }
2075 
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 
2080 
2081 
2082 
2083 
2084