xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision cd0d46ebc1dba9f5ba3de7cf2d3eee7f6fd25de7)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128 
129   if (a->inew){
130     ierr = PetscFree(a->inew);CHKERRQ(ierr);
131     a->inew = 0;
132   }
133   ierr = PetscFree(a);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
139 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
140 {
141   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
142 
143   PetscFunctionBegin;
144   switch (op) {
145   case MAT_ROW_ORIENTED:
146     a->roworiented    = PETSC_TRUE;
147     break;
148   case MAT_COLUMN_ORIENTED:
149     a->roworiented    = PETSC_FALSE;
150     break;
151   case MAT_COLUMNS_SORTED:
152     a->sorted         = PETSC_TRUE;
153     break;
154   case MAT_COLUMNS_UNSORTED:
155     a->sorted         = PETSC_FALSE;
156     break;
157   case MAT_KEEP_ZEROED_ROWS:
158     a->keepzeroedrows = PETSC_TRUE;
159     break;
160   case MAT_NO_NEW_NONZERO_LOCATIONS:
161     a->nonew          = 1;
162     break;
163   case MAT_NEW_NONZERO_LOCATION_ERR:
164     a->nonew          = -1;
165     break;
166   case MAT_NEW_NONZERO_ALLOCATION_ERR:
167     a->nonew          = -2;
168     break;
169   case MAT_YES_NEW_NONZERO_LOCATIONS:
170     a->nonew          = 0;
171     break;
172   case MAT_ROWS_SORTED:
173   case MAT_ROWS_UNSORTED:
174   case MAT_YES_NEW_DIAGONALS:
175   case MAT_IGNORE_OFF_PROC_ENTRIES:
176   case MAT_USE_HASH_TABLE:
177     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
178     break;
179   case MAT_NO_NEW_DIAGONALS:
180     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
181   default:
182     SETERRQ(PETSC_ERR_SUP,"unknown option");
183   }
184   PetscFunctionReturn(0);
185 }
186 
187 #undef __FUNCT__
188 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
189 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
190 {
191   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
192   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
193   MatScalar    *aa,*aa_i;
194   PetscScalar  *v_i;
195 
196   PetscFunctionBegin;
197   bs  = a->bs;
198   ai  = a->i;
199   aj  = a->j;
200   aa  = a->a;
201   bs2 = a->bs2;
202 
203   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
204 
205   bn  = row/bs;   /* Block number */
206   bp  = row % bs; /* Block position */
207   M   = ai[bn+1] - ai[bn];
208   *ncols = bs*M;
209 
210   if (v) {
211     *v = 0;
212     if (*ncols) {
213       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
214       for (i=0; i<M; i++) { /* for each block in the block row */
215         v_i  = *v + i*bs;
216         aa_i = aa + bs2*(ai[bn] + i);
217         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
218       }
219     }
220   }
221 
222   if (cols) {
223     *cols = 0;
224     if (*ncols) {
225       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
226       for (i=0; i<M; i++) { /* for each block in the block row */
227         cols_i = *cols + i*bs;
228         itmp  = bs*aj[ai[bn] + i];
229         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
230       }
231     }
232   }
233 
234   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
235   /* this segment is currently removed, so only entries in the upper triangle are obtained */
236 #ifdef column_search
237   v_i    = *v    + M*bs;
238   cols_i = *cols + M*bs;
239   for (i=0; i<bn; i++){ /* for each block row */
240     M = ai[i+1] - ai[i];
241     for (j=0; j<M; j++){
242       itmp = aj[ai[i] + j];    /* block column value */
243       if (itmp == bn){
244         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
245         for (k=0; k<bs; k++) {
246           *cols_i++ = i*bs+k;
247           *v_i++    = aa_i[k];
248         }
249         *ncols += bs;
250         break;
251       }
252     }
253   }
254 #endif
255 
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
261 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
262 {
263   int ierr;
264 
265   PetscFunctionBegin;
266   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
267   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
273 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
274 {
275   int ierr;
276   PetscFunctionBegin;
277   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
283 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
284 {
285   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
286   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
287   PetscScalar  *aa;
288   FILE         *file;
289 
290   PetscFunctionBegin;
291   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
292   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
293   col_lens[0] = MAT_FILE_COOKIE;
294 
295   col_lens[1] = A->m;
296   col_lens[2] = A->m;
297   col_lens[3] = a->s_nz*bs2;
298 
299   /* store lengths of each row and write (including header) to file */
300   count = 0;
301   for (i=0; i<a->mbs; i++) {
302     for (j=0; j<bs; j++) {
303       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
304     }
305   }
306   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
307   ierr = PetscFree(col_lens);CHKERRQ(ierr);
308 
309   /* store column indices (zero start index) */
310   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
311   count = 0;
312   for (i=0; i<a->mbs; i++) {
313     for (j=0; j<bs; j++) {
314       for (k=a->i[i]; k<a->i[i+1]; k++) {
315         for (l=0; l<bs; l++) {
316           jj[count++] = bs*a->j[k] + l;
317         }
318       }
319     }
320   }
321   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
322   ierr = PetscFree(jj);CHKERRQ(ierr);
323 
324   /* store nonzero values */
325   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
326   count = 0;
327   for (i=0; i<a->mbs; i++) {
328     for (j=0; j<bs; j++) {
329       for (k=a->i[i]; k<a->i[i+1]; k++) {
330         for (l=0; l<bs; l++) {
331           aa[count++] = a->a[bs2*k + l*bs + j];
332         }
333       }
334     }
335   }
336   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
337   ierr = PetscFree(aa);CHKERRQ(ierr);
338 
339   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
340   if (file) {
341     fprintf(file,"-matload_block_size %d\n",a->bs);
342   }
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
348 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
349 {
350   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
351   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
352   char              *name;
353   PetscViewerFormat format;
354 
355   PetscFunctionBegin;
356   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
357   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
358   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
359     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
360   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
361     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
362   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
363     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
364     for (i=0; i<a->mbs; i++) {
365       for (j=0; j<bs; j++) {
366         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
367         for (k=a->i[i]; k<a->i[i+1]; k++) {
368           for (l=0; l<bs; l++) {
369 #if defined(PETSC_USE_COMPLEX)
370             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
371               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
372                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
373             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
374               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
375                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
376             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
377               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
378             }
379 #else
380             if (a->a[bs2*k + l*bs + j] != 0.0) {
381               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
382             }
383 #endif
384           }
385         }
386         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
387       }
388     }
389     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
390   } else {
391     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
392     for (i=0; i<a->mbs; i++) {
393       for (j=0; j<bs; j++) {
394         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
395         for (k=a->i[i]; k<a->i[i+1]; k++) {
396           for (l=0; l<bs; l++) {
397 #if defined(PETSC_USE_COMPLEX)
398             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
399               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
400                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
401             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
402               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
403                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
404             } else {
405               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
406             }
407 #else
408             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
409 #endif
410           }
411         }
412         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
413       }
414     }
415     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
416   }
417   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
423 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
424 {
425   Mat          A = (Mat) Aa;
426   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
427   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
428   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
429   MatScalar    *aa;
430   MPI_Comm     comm;
431   PetscViewer  viewer;
432 
433   PetscFunctionBegin;
434   /*
435       This is nasty. If this is called from an originally parallel matrix
436    then all processes call this,but only the first has the matrix so the
437    rest should return immediately.
438   */
439   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
440   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
441   if (rank) PetscFunctionReturn(0);
442 
443   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
444 
445   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
446   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
447 
448   /* loop over matrix elements drawing boxes */
449   color = PETSC_DRAW_BLUE;
450   for (i=0,row=0; i<mbs; i++,row+=bs) {
451     for (j=a->i[i]; j<a->i[i+1]; j++) {
452       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
453       x_l = a->j[j]*bs; x_r = x_l + 1.0;
454       aa = a->a + j*bs2;
455       for (k=0; k<bs; k++) {
456         for (l=0; l<bs; l++) {
457           if (PetscRealPart(*aa++) >=  0.) continue;
458           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
459         }
460       }
461     }
462   }
463   color = PETSC_DRAW_CYAN;
464   for (i=0,row=0; i<mbs; i++,row+=bs) {
465     for (j=a->i[i]; j<a->i[i+1]; j++) {
466       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
467       x_l = a->j[j]*bs; x_r = x_l + 1.0;
468       aa = a->a + j*bs2;
469       for (k=0; k<bs; k++) {
470         for (l=0; l<bs; l++) {
471           if (PetscRealPart(*aa++) != 0.) continue;
472           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
473         }
474       }
475     }
476   }
477 
478   color = PETSC_DRAW_RED;
479   for (i=0,row=0; i<mbs; i++,row+=bs) {
480     for (j=a->i[i]; j<a->i[i+1]; j++) {
481       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
482       x_l = a->j[j]*bs; x_r = x_l + 1.0;
483       aa = a->a + j*bs2;
484       for (k=0; k<bs; k++) {
485         for (l=0; l<bs; l++) {
486           if (PetscRealPart(*aa++) <= 0.) continue;
487           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
488         }
489       }
490     }
491   }
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
497 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
498 {
499   int          ierr;
500   PetscReal    xl,yl,xr,yr,w,h;
501   PetscDraw    draw;
502   PetscTruth   isnull;
503 
504   PetscFunctionBegin;
505 
506   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
507   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
508 
509   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
510   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
511   xr += w;    yr += h;  xl = -w;     yl = -h;
512   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
513   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
514   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517 
518 #undef __FUNCT__
519 #define __FUNCT__ "MatView_SeqSBAIJ"
520 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
521 {
522   int        ierr;
523   PetscTruth issocket,isascii,isbinary,isdraw;
524 
525   PetscFunctionBegin;
526   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
527   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
529   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
530   if (issocket) {
531     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
532   } else if (isascii){
533     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
534   } else if (isbinary) {
535     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
536   } else if (isdraw) {
537     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
538   } else {
539     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
547 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
548 {
549   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
550   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
551   int        *ai = a->i,*ailen = a->ilen;
552   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
553   MatScalar  *ap,*aa = a->a,zero = 0.0;
554 
555   PetscFunctionBegin;
556   for (k=0; k<m; k++) { /* loop over rows */
557     row  = im[k]; brow = row/bs;
558     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
559     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
560     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
561     nrow = ailen[brow];
562     for (l=0; l<n; l++) { /* loop over columns */
563       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
564       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
565       col  = in[l] ;
566       bcol = col/bs;
567       cidx = col%bs;
568       ridx = row%bs;
569       high = nrow;
570       low  = 0; /* assume unsorted */
571       while (high-low > 5) {
572         t = (low+high)/2;
573         if (rp[t] > bcol) high = t;
574         else             low  = t;
575       }
576       for (i=low; i<high; i++) {
577         if (rp[i] > bcol) break;
578         if (rp[i] == bcol) {
579           *v++ = ap[bs2*i+bs*cidx+ridx];
580           goto finished;
581         }
582       }
583       *v++ = zero;
584       finished:;
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
593 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
594 {
595   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
596   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
597   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
598   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
599   PetscTruth  roworiented=a->roworiented;
600   MatScalar   *value = v,*ap,*aa = a->a,*bap;
601 
602   PetscFunctionBegin;
603   if (roworiented) {
604     stepval = (n-1)*bs;
605   } else {
606     stepval = (m-1)*bs;
607   }
608   for (k=0; k<m; k++) { /* loop over added rows */
609     row  = im[k];
610     if (row < 0) continue;
611 #if defined(PETSC_USE_BOPT_g)
612     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
613 #endif
614     rp   = aj + ai[row];
615     ap   = aa + bs2*ai[row];
616     rmax = imax[row];
617     nrow = ailen[row];
618     low  = 0;
619     for (l=0; l<n; l++) { /* loop over added columns */
620       if (in[l] < 0) continue;
621 #if defined(PETSC_USE_BOPT_g)
622       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
623 #endif
624       col = in[l];
625       if (col < row) continue; /* ignore lower triangular block */
626       if (roworiented) {
627         value = v + k*(stepval+bs)*bs + l*bs;
628       } else {
629         value = v + l*(stepval+bs)*bs + k*bs;
630       }
631       if (!sorted) low = 0; high = nrow;
632       while (high-low > 7) {
633         t = (low+high)/2;
634         if (rp[t] > col) high = t;
635         else             low  = t;
636       }
637       for (i=low; i<high; i++) {
638         if (rp[i] > col) break;
639         if (rp[i] == col) {
640           bap  = ap +  bs2*i;
641           if (roworiented) {
642             if (is == ADD_VALUES) {
643               for (ii=0; ii<bs; ii++,value+=stepval) {
644                 for (jj=ii; jj<bs2; jj+=bs) {
645                   bap[jj] += *value++;
646                 }
647               }
648             } else {
649               for (ii=0; ii<bs; ii++,value+=stepval) {
650                 for (jj=ii; jj<bs2; jj+=bs) {
651                   bap[jj] = *value++;
652                 }
653               }
654             }
655           } else {
656             if (is == ADD_VALUES) {
657               for (ii=0; ii<bs; ii++,value+=stepval) {
658                 for (jj=0; jj<bs; jj++) {
659                   *bap++ += *value++;
660                 }
661               }
662             } else {
663               for (ii=0; ii<bs; ii++,value+=stepval) {
664                 for (jj=0; jj<bs; jj++) {
665                   *bap++  = *value++;
666                 }
667               }
668             }
669           }
670           goto noinsert2;
671         }
672       }
673       if (nonew == 1) goto noinsert2;
674       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
675       if (nrow >= rmax) {
676         /* there is no extra room in row, therefore enlarge */
677         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
678         MatScalar *new_a;
679 
680         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
681 
682         /* malloc new storage space */
683         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
684 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
685         new_j   = (int*)(new_a + bs2*new_nz);
686         new_i   = new_j + new_nz;
687 
688         /* copy over old data into new slots */
689         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
690         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
691         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
692         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
693         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
694         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
695         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
696         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
697         /* free up old matrix storage */
698         ierr = PetscFree(a->a);CHKERRQ(ierr);
699         if (!a->singlemalloc) {
700           ierr = PetscFree(a->i);CHKERRQ(ierr);
701           ierr = PetscFree(a->j);CHKERRQ(ierr);
702         }
703         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
704         a->singlemalloc = PETSC_TRUE;
705 
706         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
707         rmax = imax[row] = imax[row] + CHUNKSIZE;
708         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
709         a->s_maxnz += bs2*CHUNKSIZE;
710         a->reallocs++;
711         a->s_nz++;
712       }
713       N = nrow++ - 1;
714       /* shift up all the later entries in this row */
715       for (ii=N; ii>=i; ii--) {
716         rp[ii+1] = rp[ii];
717         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
718       }
719       if (N >= i) {
720         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
721       }
722       rp[i] = col;
723       bap   = ap +  bs2*i;
724       if (roworiented) {
725         for (ii=0; ii<bs; ii++,value+=stepval) {
726           for (jj=ii; jj<bs2; jj+=bs) {
727             bap[jj] = *value++;
728           }
729         }
730       } else {
731         for (ii=0; ii<bs; ii++,value+=stepval) {
732           for (jj=0; jj<bs; jj++) {
733             *bap++  = *value++;
734           }
735         }
736       }
737       noinsert2:;
738       low = i;
739     }
740     ailen[row] = nrow;
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
747 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
748 {
749   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
750   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
751   int        m = A->m,*ip,N,*ailen = a->ilen;
752   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
753   MatScalar  *aa = a->a,*ap;
754 #if defined(PETSC_HAVE_SPOOLES)
755   PetscTruth   flag;
756 #endif
757 
758   PetscFunctionBegin;
759   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
760 
761   if (m) rmax = ailen[0];
762   for (i=1; i<mbs; i++) {
763     /* move each row back by the amount of empty slots (fshift) before it*/
764     fshift += imax[i-1] - ailen[i-1];
765     rmax   = PetscMax(rmax,ailen[i]);
766     if (fshift) {
767       ip = aj + ai[i]; ap = aa + bs2*ai[i];
768       N = ailen[i];
769       for (j=0; j<N; j++) {
770         ip[j-fshift] = ip[j];
771         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
772       }
773     }
774     ai[i] = ai[i-1] + ailen[i-1];
775   }
776   if (mbs) {
777     fshift += imax[mbs-1] - ailen[mbs-1];
778     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
779   }
780   /* reset ilen and imax for each row */
781   for (i=0; i<mbs; i++) {
782     ailen[i] = imax[i] = ai[i+1] - ai[i];
783   }
784   a->s_nz = ai[mbs];
785 
786   /* diagonals may have moved, reset it */
787   if (a->diag) {
788     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
789   }
790   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
791            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
792   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
793            a->reallocs);
794   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
795   a->reallocs          = 0;
796   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
797 
798 #if defined(PETSC_HAVE_SPOOLES)
799   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
800   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); }
801 #endif
802 
803   PetscFunctionReturn(0);
804 }
805 
806 /*
807    This function returns an array of flags which indicate the locations of contiguous
808    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
809    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
810    Assume: sizes should be long enough to hold all the values.
811 */
812 #undef __FUNCT__
813 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
814 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
815 {
816   int        i,j,k,row;
817   PetscTruth flg;
818 
819   PetscFunctionBegin;
820   for (i=0,j=0; i<n; j++) {
821     row = idx[i];
822     if (row%bs!=0) { /* Not the begining of a block */
823       sizes[j] = 1;
824       i++;
825     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
826       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
827       i++;
828     } else { /* Begining of the block, so check if the complete block exists */
829       flg = PETSC_TRUE;
830       for (k=1; k<bs; k++) {
831         if (row+k != idx[i+k]) { /* break in the block */
832           flg = PETSC_FALSE;
833           break;
834         }
835       }
836       if (flg == PETSC_TRUE) { /* No break in the bs */
837         sizes[j] = bs;
838         i+= bs;
839       } else {
840         sizes[j] = 1;
841         i++;
842       }
843     }
844   }
845   *bs_max = j;
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
851 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag)
852 {
853   PetscFunctionBegin;
854   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
855 }
856 
857 /* Only add/insert a(i,j) with i<=j (blocks).
858    Any a(i,j) with i>j input by user is ingored.
859 */
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
863 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
864 {
865   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
866   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
867   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
868   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
869   int         ridx,cidx,bs2=a->bs2,ierr;
870   MatScalar   *ap,value,*aa=a->a,*bap;
871 
872   PetscFunctionBegin;
873 
874   for (k=0; k<m; k++) { /* loop over added rows */
875     row  = im[k];       /* row number */
876     brow = row/bs;      /* block row number */
877     if (row < 0) continue;
878 #if defined(PETSC_USE_BOPT_g)
879     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
880 #endif
881     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
882     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
883     rmax = imax[brow];  /* maximum space allocated for this row */
884     nrow = ailen[brow]; /* actual length of this row */
885     low  = 0;
886 
887     for (l=0; l<n; l++) { /* loop over added columns */
888       if (in[l] < 0) continue;
889 #if defined(PETSC_USE_BOPT_g)
890       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
891 #endif
892       col = in[l];
893       bcol = col/bs;              /* block col number */
894 
895       if (brow <= bcol){
896         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
897         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
898           /* element value a(k,l) */
899           if (roworiented) {
900             value = v[l + k*n];
901           } else {
902             value = v[k + l*m];
903           }
904 
905           /* move pointer bap to a(k,l) quickly and add/insert value */
906           if (!sorted) low = 0; high = nrow;
907           while (high-low > 7) {
908             t = (low+high)/2;
909             if (rp[t] > bcol) high = t;
910             else              low  = t;
911           }
912           for (i=low; i<high; i++) {
913             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
914             if (rp[i] > bcol) break;
915             if (rp[i] == bcol) {
916               bap  = ap +  bs2*i + bs*cidx + ridx;
917               if (is == ADD_VALUES) *bap += value;
918               else                  *bap  = value;
919               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
920               if (brow == bcol && ridx < cidx){
921                 bap  = ap +  bs2*i + bs*ridx + cidx;
922                 if (is == ADD_VALUES) *bap += value;
923                 else                  *bap  = value;
924               }
925               goto noinsert1;
926             }
927           }
928 
929       if (nonew == 1) goto noinsert1;
930       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
931       if (nrow >= rmax) {
932         /* there is no extra room in row, therefore enlarge */
933         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
934         MatScalar *new_a;
935 
936         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
937 
938         /* Malloc new storage space */
939         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
940         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
941         new_j = (int*)(new_a + bs2*new_nz);
942         new_i = new_j + new_nz;
943 
944         /* copy over old data into new slots */
945         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
946         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
947         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
948         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
949         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
950         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
951         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
952         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
953         /* free up old matrix storage */
954         ierr = PetscFree(a->a);CHKERRQ(ierr);
955         if (!a->singlemalloc) {
956           ierr = PetscFree(a->i);CHKERRQ(ierr);
957           ierr = PetscFree(a->j);CHKERRQ(ierr);
958         }
959         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
960         a->singlemalloc = PETSC_TRUE;
961 
962         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
963         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
964         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
965         a->s_maxnz += bs2*CHUNKSIZE;
966         a->reallocs++;
967         a->s_nz++;
968       }
969 
970       N = nrow++ - 1;
971       /* shift up all the later entries in this row */
972       for (ii=N; ii>=i; ii--) {
973         rp[ii+1] = rp[ii];
974         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
975       }
976       if (N>=i) {
977         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
978       }
979       rp[i]                      = bcol;
980       ap[bs2*i + bs*cidx + ridx] = value;
981       noinsert1:;
982       low = i;
983       /* } */
984         }
985       } /* end of if .. if..  */
986     }                     /* end of loop over added columns */
987     ailen[brow] = nrow;
988   }                       /* end of loop over added rows */
989 
990   PetscFunctionReturn(0);
991 }
992 
993 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
994 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
995 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
996 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
997 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
998 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
999 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
1000 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat);
1001 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
1002 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1003 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1004 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1005 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1006 extern int MatZeroEntries_SeqSBAIJ(Mat);
1007 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1008 
1009 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1010 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1011 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1012 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1013 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1014 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1015 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1016 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1017 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1018 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1019 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1020 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1021 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1022 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1023 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1024 
1025 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1026 
1027 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1028 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1029 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1030 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1031 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1032 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1033 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1034 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1035 
1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1044 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1045 
1046 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1047 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1048 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1049 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1050 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1051 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1052 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1053 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1054 
1055 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1056 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1057 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1058 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1059 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1060 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1061 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1062 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1063 
1064 #ifdef HAVE_MatICCFactor
1065 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1068 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1069 {
1070   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1071   Mat         outA;
1072   int         ierr;
1073   PetscTruth  row_identity,col_identity;
1074 
1075   PetscFunctionBegin;
1076   /*
1077   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1078   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1079   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1080   if (!row_identity || !col_identity) {
1081     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1082   }
1083   */
1084 
1085   outA          = inA;
1086   inA->factor   = FACTOR_CHOLESKY;
1087 
1088   if (!a->diag) {
1089     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1090   }
1091   /*
1092       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1093       for ILU(0) factorization with natural ordering
1094   */
1095   switch (a->bs) {
1096   case 1:
1097     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1098     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1099     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1100     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1101   case 2:
1102     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1103     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1104     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1105     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1106     break;
1107   case 3:
1108     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1109     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1110     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1111     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1112     break;
1113   case 4:
1114     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1115     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1116     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1117     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1118     break;
1119   case 5:
1120     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1121     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1122     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1123     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1124     break;
1125   case 6:
1126     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1127     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1128     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1129     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1130     break;
1131   case 7:
1132     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1133     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1134     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1135     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1136     break;
1137   default:
1138     a->row        = row;
1139     a->icol       = col;
1140     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1141     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1142 
1143     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1144     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1145     PetscLogObjectParent(inA,a->icol);
1146 
1147     if (!a->solve_work) {
1148       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1149       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1150     }
1151   }
1152 
1153   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1154 
1155   PetscFunctionReturn(0);
1156 }
1157 #endif
1158 
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1161 int MatPrintHelp_SeqSBAIJ(Mat A)
1162 {
1163   static PetscTruth called = PETSC_FALSE;
1164   MPI_Comm          comm = A->comm;
1165   int               ierr;
1166 
1167   PetscFunctionBegin;
1168   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1169   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1170   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 EXTERN_C_BEGIN
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1177 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1178 {
1179   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1180   int         i,nz,n;
1181 
1182   PetscFunctionBegin;
1183   nz = baij->s_maxnz;
1184   n  = mat->n;
1185   for (i=0; i<nz; i++) {
1186     baij->j[i] = indices[i];
1187   }
1188   baij->s_nz = nz;
1189   for (i=0; i<n; i++) {
1190     baij->ilen[i] = baij->imax[i];
1191   }
1192 
1193   PetscFunctionReturn(0);
1194 }
1195 EXTERN_C_END
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1199 /*@
1200     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1201        in the matrix.
1202 
1203   Input Parameters:
1204 +  mat     - the SeqSBAIJ matrix
1205 -  indices - the column indices
1206 
1207   Level: advanced
1208 
1209   Notes:
1210     This can be called if you have precomputed the nonzero structure of the
1211   matrix and want to provide it to the matrix object to improve the performance
1212   of the MatSetValues() operation.
1213 
1214     You MUST have set the correct numbers of nonzeros per row in the call to
1215   MatCreateSeqSBAIJ().
1216 
1217     MUST be called before any calls to MatSetValues()
1218 
1219 .seealso: MatCreateSeqSBAIJ
1220 @*/
1221 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1222 {
1223   int ierr,(*f)(Mat,int *);
1224 
1225   PetscFunctionBegin;
1226   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1227   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1228   if (f) {
1229     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1230   } else {
1231     SETERRQ(1,"Wrong type of matrix to set column indices");
1232   }
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1238 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1239 {
1240   int        ierr;
1241 
1242   PetscFunctionBegin;
1243   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1249 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array)
1250 {
1251   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1252   PetscFunctionBegin;
1253   *array = a->a;
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1259 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array)
1260 {
1261   PetscFunctionBegin;
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #include "petscblaslapack.h"
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1268 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1269 {
1270   int          ierr,one=1,i;
1271   Mat_SeqSBAIJ *x  = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data;
1272   int          *xtoy,nz,row,xcol,ycol,jx,jy,*xi=x->i,*xj=x->j,*yi=y->i,*yj=y->j;
1273 
1274   PetscFunctionBegin;
1275   if (str == SAME_NONZERO_PATTERN) {
1276     BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one);
1277   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1278     if (x->bs > 1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not support for bs>1");
1279     ierr = PetscMalloc(x->s_nz*sizeof(int),&xtoy);CHKERRQ(ierr);
1280     i = 0;
1281     for (row=0; row<x->mbs; row++){
1282       nz = xi[1] - xi[0];
1283       jy = 0;
1284       for (jx=0; jx<nz; jx++,jy++){
1285         xcol = xj[*xi + jx];
1286         ycol = yj[*yi + jy];  /* col index for y */
1287         while ( ycol < xcol ) {
1288           jy++;
1289           ycol = yj[*yi + jy];
1290         }
1291         if (xcol != ycol) SETERRQ2(PETSC_ERR_ARG_WRONG,"X matrix entry (%d,%d) is not in Y matrix",row,ycol);
1292         xtoy[i++] = *yi + jy;
1293       }
1294       xi++; yi++;
1295     }
1296 
1297     for (i=0; i<x->s_nz; i++) {
1298       y->a[xtoy[i]] += (*a)*(x->a[i]);
1299     }
1300     ierr = PetscFree(xtoy);CHKERRQ(ierr);
1301   } else {
1302     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1303   }
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 /* -------------------------------------------------------------------*/
1308 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1309        MatGetRow_SeqSBAIJ,
1310        MatRestoreRow_SeqSBAIJ,
1311        MatMult_SeqSBAIJ_N,
1312        MatMultAdd_SeqSBAIJ_N,
1313        MatMultTranspose_SeqSBAIJ,
1314        MatMultTransposeAdd_SeqSBAIJ,
1315        MatSolve_SeqSBAIJ_N,
1316        0,
1317        0,
1318        0,
1319        0,
1320        MatCholeskyFactor_SeqSBAIJ,
1321        MatRelax_SeqSBAIJ,
1322        MatTranspose_SeqSBAIJ,
1323        MatGetInfo_SeqSBAIJ,
1324        MatEqual_SeqSBAIJ,
1325        MatGetDiagonal_SeqSBAIJ,
1326        MatDiagonalScale_SeqSBAIJ,
1327        MatNorm_SeqSBAIJ,
1328        0,
1329        MatAssemblyEnd_SeqSBAIJ,
1330        0,
1331        MatSetOption_SeqSBAIJ,
1332        MatZeroEntries_SeqSBAIJ,
1333        MatZeroRows_SeqSBAIJ,
1334        0,
1335        0,
1336        MatCholeskyFactorSymbolic_SeqSBAIJ,
1337        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1338        MatSetUpPreallocation_SeqSBAIJ,
1339        0,
1340        MatICCFactorSymbolic_SeqSBAIJ,
1341        MatGetArray_SeqSBAIJ,
1342        MatRestoreArray_SeqSBAIJ,
1343        MatDuplicate_SeqSBAIJ,
1344        0,
1345        0,
1346        0,
1347        0,
1348        MatAXPY_SeqSBAIJ,
1349        MatGetSubMatrices_SeqSBAIJ,
1350        MatIncreaseOverlap_SeqSBAIJ,
1351        MatGetValues_SeqSBAIJ,
1352        0,
1353        MatPrintHelp_SeqSBAIJ,
1354        MatScale_SeqSBAIJ,
1355        0,
1356        0,
1357        0,
1358        MatGetBlockSize_SeqSBAIJ,
1359        MatGetRowIJ_SeqSBAIJ,
1360        MatRestoreRowIJ_SeqSBAIJ,
1361        0,
1362        0,
1363        0,
1364        0,
1365        0,
1366        0,
1367        MatSetValuesBlocked_SeqSBAIJ,
1368        MatGetSubMatrix_SeqSBAIJ,
1369        0,
1370        0,
1371        MatGetPetscMaps_Petsc,
1372        0,
1373        0,
1374        0,
1375        0,
1376        0,
1377        0,
1378        MatGetRowMax_SeqSBAIJ,
1379        0,
1380        0,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386        0,
1387        0,
1388        0,
1389        0,
1390        0,
1391        0,
1392 #if !defined(PETSC_USE_COMPLEX)
1393        MatGetInertia_SeqSBAIJ
1394 #else
1395        0
1396 #endif
1397 };
1398 
1399 EXTERN_C_BEGIN
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1402 int MatStoreValues_SeqSBAIJ(Mat mat)
1403 {
1404   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1405   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1406   int         ierr;
1407 
1408   PetscFunctionBegin;
1409   if (aij->nonew != 1) {
1410     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1411   }
1412 
1413   /* allocate space for values if not already there */
1414   if (!aij->saved_values) {
1415     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1416   }
1417 
1418   /* copy values over */
1419   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 EXTERN_C_END
1423 
1424 EXTERN_C_BEGIN
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1427 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1428 {
1429   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1430   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1431 
1432   PetscFunctionBegin;
1433   if (aij->nonew != 1) {
1434     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1435   }
1436   if (!aij->saved_values) {
1437     SETERRQ(1,"Must call MatStoreValues(A);first");
1438   }
1439 
1440   /* copy values over */
1441   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 EXTERN_C_END
1445 
1446 EXTERN_C_BEGIN
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1449 int MatCreate_SeqSBAIJ(Mat B)
1450 {
1451   Mat_SeqSBAIJ *b;
1452   int          ierr,size;
1453 
1454   PetscFunctionBegin;
1455   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1456   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1457   B->m = B->M = PetscMax(B->m,B->M);
1458   B->n = B->N = PetscMax(B->n,B->N);
1459 
1460   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1461   B->data = (void*)b;
1462   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1463   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1464   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1465   B->ops->view        = MatView_SeqSBAIJ;
1466   B->factor           = 0;
1467   B->lupivotthreshold = 1.0;
1468   B->mapping          = 0;
1469   b->row              = 0;
1470   b->icol             = 0;
1471   b->reallocs         = 0;
1472   b->saved_values     = 0;
1473 
1474   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1475   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1476 
1477   b->sorted           = PETSC_FALSE;
1478   b->roworiented      = PETSC_TRUE;
1479   b->nonew            = 0;
1480   b->diag             = 0;
1481   b->solve_work       = 0;
1482   b->mult_work        = 0;
1483   B->spptr            = 0;
1484   b->keepzeroedrows   = PETSC_FALSE;
1485 
1486   b->inew             = 0;
1487   b->jnew             = 0;
1488   b->anew             = 0;
1489   b->a2anew           = 0;
1490   b->permute          = PETSC_FALSE;
1491 
1492   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1493                                      "MatStoreValues_SeqSBAIJ",
1494                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1495   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1496                                      "MatRetrieveValues_SeqSBAIJ",
1497                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1499                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1500                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 EXTERN_C_END
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1507 /*@C
1508    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1509    compressed row) format.  For good matrix assembly performance the
1510    user should preallocate the matrix storage by setting the parameter nz
1511    (or the array nnz).  By setting these parameters accurately, performance
1512    during matrix assembly can be increased by more than a factor of 50.
1513 
1514    Collective on Mat
1515 
1516    Input Parameters:
1517 +  A - the symmetric matrix
1518 .  bs - size of block
1519 .  nz - number of block nonzeros per block row (same for all rows)
1520 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1521          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1522 
1523    Options Database Keys:
1524 .   -mat_no_unroll - uses code that does not unroll the loops in the
1525                      block calculations (much slower)
1526 .    -mat_block_size - size of the blocks to use
1527 
1528    Level: intermediate
1529 
1530    Notes:
1531    Specify the preallocated storage with either nz or nnz (not both).
1532    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1533    allocation.  For additional details, see the users manual chapter on
1534    matrices.
1535 
1536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1537 @*/
1538 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1539 {
1540   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1541   int          i,len,ierr,mbs,bs2;
1542   PetscTruth   flg;
1543   int          s_nz;
1544 
1545   PetscFunctionBegin;
1546   B->preallocated = PETSC_TRUE;
1547   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1548   mbs  = B->m/bs;
1549   bs2  = bs*bs;
1550 
1551   if (mbs*bs != B->m) {
1552     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1553   }
1554 
1555   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1556   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1557   if (nnz) {
1558     for (i=0; i<mbs; i++) {
1559       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1560       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1561     }
1562   }
1563 
1564   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1565   if (!flg) {
1566     switch (bs) {
1567     case 1:
1568       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1569       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1570       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1571       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1572       B->ops->mult            = MatMult_SeqSBAIJ_1;
1573       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1574       break;
1575     case 2:
1576       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1577       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1578       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1579       B->ops->mult            = MatMult_SeqSBAIJ_2;
1580       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1581       break;
1582     case 3:
1583       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1584       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1585       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1586       B->ops->mult            = MatMult_SeqSBAIJ_3;
1587       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1588       break;
1589     case 4:
1590       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1591       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1592       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1593       B->ops->mult            = MatMult_SeqSBAIJ_4;
1594       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1595       break;
1596     case 5:
1597       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1598       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1599       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1600       B->ops->mult            = MatMult_SeqSBAIJ_5;
1601       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1602       break;
1603     case 6:
1604       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1605       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1606       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1607       B->ops->mult            = MatMult_SeqSBAIJ_6;
1608       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1609       break;
1610     case 7:
1611       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1612       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1613       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1614       B->ops->mult            = MatMult_SeqSBAIJ_7;
1615       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1616       break;
1617     }
1618   }
1619 
1620   b->mbs = mbs;
1621   b->nbs = mbs;
1622   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1623   if (!nnz) {
1624     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1625     else if (nz <= 0)        nz = 1;
1626     for (i=0; i<mbs; i++) {
1627       b->imax[i] = nz;
1628     }
1629     nz = nz*mbs; /* total nz */
1630   } else {
1631     nz = 0;
1632     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1633   }
1634   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1635   s_nz = nz;
1636 
1637   /* allocate the matrix space */
1638   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1639   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1640   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1641   b->j = (int*)(b->a + s_nz*bs2);
1642   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1643   b->i = b->j + s_nz;
1644   b->singlemalloc = PETSC_TRUE;
1645 
1646   /* pointer to beginning of each row */
1647   b->i[0] = 0;
1648   for (i=1; i<mbs+1; i++) {
1649     b->i[i] = b->i[i-1] + b->imax[i-1];
1650   }
1651 
1652   /* b->ilen will count nonzeros in each block row so far. */
1653   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1654   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1655   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1656 
1657   b->bs               = bs;
1658   b->bs2              = bs2;
1659   b->s_nz             = 0;
1660   b->s_maxnz          = s_nz*bs2;
1661 
1662   b->inew             = 0;
1663   b->jnew             = 0;
1664   b->anew             = 0;
1665   b->a2anew           = 0;
1666   b->permute          = PETSC_FALSE;
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatCreateSeqSBAIJ"
1673 /*@C
1674    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1675    compressed row) format.  For good matrix assembly performance the
1676    user should preallocate the matrix storage by setting the parameter nz
1677    (or the array nnz).  By setting these parameters accurately, performance
1678    during matrix assembly can be increased by more than a factor of 50.
1679 
1680    Collective on MPI_Comm
1681 
1682    Input Parameters:
1683 +  comm - MPI communicator, set to PETSC_COMM_SELF
1684 .  bs - size of block
1685 .  m - number of rows, or number of columns
1686 .  nz - number of block nonzeros per block row (same for all rows)
1687 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1688          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1689 
1690    Output Parameter:
1691 .  A - the symmetric matrix
1692 
1693    Options Database Keys:
1694 .   -mat_no_unroll - uses code that does not unroll the loops in the
1695                      block calculations (much slower)
1696 .    -mat_block_size - size of the blocks to use
1697 
1698    Level: intermediate
1699 
1700    Notes:
1701 
1702    Specify the preallocated storage with either nz or nnz (not both).
1703    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1704    allocation.  For additional details, see the users manual chapter on
1705    matrices.
1706 
1707 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1708 @*/
1709 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1710 {
1711   int ierr;
1712 
1713   PetscFunctionBegin;
1714   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1715   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1716   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1722 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1723 {
1724   Mat         C;
1725   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1726   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1727 
1728   PetscFunctionBegin;
1729   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1730 
1731   *B = 0;
1732   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1733   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1734   c    = (Mat_SeqSBAIJ*)C->data;
1735 
1736   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1737   C->preallocated   = PETSC_TRUE;
1738   C->factor         = A->factor;
1739   c->row            = 0;
1740   c->icol           = 0;
1741   c->saved_values   = 0;
1742   c->keepzeroedrows = a->keepzeroedrows;
1743   C->assembled      = PETSC_TRUE;
1744 
1745   c->bs         = a->bs;
1746   c->bs2        = a->bs2;
1747   c->mbs        = a->mbs;
1748   c->nbs        = a->nbs;
1749 
1750   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1751   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1752   for (i=0; i<mbs; i++) {
1753     c->imax[i] = a->imax[i];
1754     c->ilen[i] = a->ilen[i];
1755   }
1756 
1757   /* allocate the matrix space */
1758   c->singlemalloc = PETSC_TRUE;
1759   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1760   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1761   c->j = (int*)(c->a + nz*bs2);
1762   c->i = c->j + nz;
1763   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1764   if (mbs > 0) {
1765     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1766     if (cpvalues == MAT_COPY_VALUES) {
1767       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1768     } else {
1769       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1770     }
1771   }
1772 
1773   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1774   c->sorted      = a->sorted;
1775   c->roworiented = a->roworiented;
1776   c->nonew       = a->nonew;
1777 
1778   if (a->diag) {
1779     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1780     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1781     for (i=0; i<mbs; i++) {
1782       c->diag[i] = a->diag[i];
1783     }
1784   } else c->diag        = 0;
1785   c->s_nz               = a->s_nz;
1786   c->s_maxnz            = a->s_maxnz;
1787   c->solve_work         = 0;
1788   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1789   c->mult_work          = 0;
1790   *B = C;
1791   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 EXTERN_C_BEGIN
1796 #undef __FUNCT__
1797 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1798 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1799 {
1800   Mat_SeqSBAIJ *a;
1801   Mat          B;
1802   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1803   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1804   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1805   int          *masked,nmask,tmp,bs2,ishift;
1806   PetscScalar  *aa;
1807   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1808 #if defined(PETSC_HAVE_SPOOLES)
1809   PetscTruth   flag;
1810 #endif
1811 
1812   PetscFunctionBegin;
1813   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1814   bs2  = bs*bs;
1815 
1816   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1817   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1818   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1819   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1820   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1821   M = header[1]; N = header[2]; nz = header[3];
1822 
1823   if (header[3] < 0) {
1824     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1825   }
1826 
1827   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1828 
1829   /*
1830      This code adds extra rows to make sure the number of rows is
1831     divisible by the blocksize
1832   */
1833   mbs        = M/bs;
1834   extra_rows = bs - M + bs*(mbs);
1835   if (extra_rows == bs) extra_rows = 0;
1836   else                  mbs++;
1837   if (extra_rows) {
1838     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1839   }
1840 
1841   /* read in row lengths */
1842   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1843   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1844   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1845 
1846   /* read in column indices */
1847   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1848   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1849   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1850 
1851   /* loop over row lengths determining block row lengths */
1852   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1853   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1854   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1855   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1856   masked   = mask + mbs;
1857   rowcount = 0; nzcount = 0;
1858   for (i=0; i<mbs; i++) {
1859     nmask = 0;
1860     for (j=0; j<bs; j++) {
1861       kmax = rowlengths[rowcount];
1862       for (k=0; k<kmax; k++) {
1863         tmp = jj[nzcount++]/bs;   /* block col. index */
1864         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1865       }
1866       rowcount++;
1867     }
1868     s_browlengths[i] += nmask;
1869 
1870     /* zero out the mask elements we set */
1871     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1872   }
1873 
1874   /* create our matrix */
1875   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr);
1876   B = *A;
1877   a = (Mat_SeqSBAIJ*)B->data;
1878 
1879   /* set matrix "i" values */
1880   a->i[0] = 0;
1881   for (i=1; i<= mbs; i++) {
1882     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1883     a->ilen[i-1] = s_browlengths[i-1];
1884   }
1885   a->s_nz = a->i[mbs];
1886 
1887   /* read in nonzero values */
1888   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1889   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1890   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1891 
1892   /* set "a" and "j" values into matrix */
1893   nzcount = 0; jcount = 0;
1894   for (i=0; i<mbs; i++) {
1895     nzcountb = nzcount;
1896     nmask    = 0;
1897     for (j=0; j<bs; j++) {
1898       kmax = rowlengths[i*bs+j];
1899       for (k=0; k<kmax; k++) {
1900         tmp = jj[nzcount++]/bs; /* block col. index */
1901         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1902       }
1903     }
1904     /* sort the masked values */
1905     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1906 
1907     /* set "j" values into matrix */
1908     maskcount = 1;
1909     for (j=0; j<nmask; j++) {
1910       a->j[jcount++]  = masked[j];
1911       mask[masked[j]] = maskcount++;
1912     }
1913 
1914     /* set "a" values into matrix */
1915     ishift = bs2*a->i[i];
1916     for (j=0; j<bs; j++) {
1917       kmax = rowlengths[i*bs+j];
1918       for (k=0; k<kmax; k++) {
1919         tmp       = jj[nzcountb]/bs ; /* block col. index */
1920         if (tmp >= i){
1921           block     = mask[tmp] - 1;
1922           point     = jj[nzcountb] - bs*tmp;
1923           idx       = ishift + bs2*block + j + bs*point;
1924           a->a[idx] = aa[nzcountb];
1925         }
1926         nzcountb++;
1927       }
1928     }
1929     /* zero out the mask elements we set */
1930     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1931   }
1932   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1933 
1934   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1935   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1936   ierr = PetscFree(aa);CHKERRQ(ierr);
1937   ierr = PetscFree(jj);CHKERRQ(ierr);
1938   ierr = PetscFree(mask);CHKERRQ(ierr);
1939 
1940   B->assembled = PETSC_TRUE;
1941 #if defined(PETSC_HAVE_SPOOLES)
1942   ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
1943   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(B);CHKERRQ(ierr); }
1944 #endif
1945   ierr = MatView_Private(B);CHKERRQ(ierr);
1946   PetscFunctionReturn(0);
1947 }
1948 EXTERN_C_END
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1952 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1953 {
1954   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1955   MatScalar    *aa=a->a,*v,*v1;
1956   PetscScalar  *x,*b,*t,sum,d;
1957   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1958   int          nz,nz1,*vj,*vj1,i;
1959 
1960   PetscFunctionBegin;
1961   its = its*lits;
1962   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1963 
1964   if (bs > 1)
1965     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1966 
1967   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1968   if (xx != bb) {
1969     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1970   } else {
1971     b = x;
1972   }
1973 
1974   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1975 
1976   if (flag & SOR_ZERO_INITIAL_GUESS) {
1977     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1978       for (i=0; i<m; i++)
1979         t[i] = b[i];
1980 
1981       for (i=0; i<m; i++){
1982         d  = *(aa + ai[i]);  /* diag[i] */
1983         v  = aa + ai[i] + 1;
1984         vj = aj + ai[i] + 1;
1985         nz = ai[i+1] - ai[i] - 1;
1986         x[i] = omega*t[i]/d;
1987         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1988         PetscLogFlops(2*nz-1);
1989       }
1990     }
1991 
1992     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1993       for (i=0; i<m; i++)
1994         t[i] = b[i];
1995 
1996       for (i=0; i<m-1; i++){  /* update rhs */
1997         v  = aa + ai[i] + 1;
1998         vj = aj + ai[i] + 1;
1999         nz = ai[i+1] - ai[i] - 1;
2000         while (nz--) t[*vj++] -= x[i]*(*v++);
2001         PetscLogFlops(2*nz-1);
2002       }
2003       for (i=m-1; i>=0; i--){
2004         d  = *(aa + ai[i]);
2005         v  = aa + ai[i] + 1;
2006         vj = aj + ai[i] + 1;
2007         nz = ai[i+1] - ai[i] - 1;
2008         sum = t[i];
2009         while (nz--) sum -= x[*vj++]*(*v++);
2010         PetscLogFlops(2*nz-1);
2011         x[i] =   (1-omega)*x[i] + omega*sum/d;
2012       }
2013     }
2014     its--;
2015   }
2016 
2017   while (its--) {
2018     /*
2019        forward sweep:
2020        for i=0,...,m-1:
2021          sum[i] = (b[i] - U(i,:)x )/d[i];
2022          x[i]   = (1-omega)x[i] + omega*sum[i];
2023          b      = b - x[i]*U^T(i,:);
2024 
2025     */
2026     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2027       for (i=0; i<m; i++)
2028         t[i] = b[i];
2029 
2030       for (i=0; i<m; i++){
2031         d  = *(aa + ai[i]);  /* diag[i] */
2032         v  = aa + ai[i] + 1; v1=v;
2033         vj = aj + ai[i] + 1; vj1=vj;
2034         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2035         sum = t[i];
2036         while (nz1--) sum -= (*v1++)*x[*vj1++];
2037         x[i] = (1-omega)*x[i] + omega*sum/d;
2038         while (nz--) t[*vj++] -= x[i]*(*v++);
2039         PetscLogFlops(4*nz-2);
2040       }
2041     }
2042 
2043   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2044       /*
2045        backward sweep:
2046        b = b - x[i]*U^T(i,:), i=0,...,n-2
2047        for i=m-1,...,0:
2048          sum[i] = (b[i] - U(i,:)x )/d[i];
2049          x[i]   = (1-omega)x[i] + omega*sum[i];
2050       */
2051       for (i=0; i<m; i++)
2052         t[i] = b[i];
2053 
2054       for (i=0; i<m-1; i++){  /* update rhs */
2055         v  = aa + ai[i] + 1;
2056         vj = aj + ai[i] + 1;
2057         nz = ai[i+1] - ai[i] - 1;
2058         while (nz--) t[*vj++] -= x[i]*(*v++);
2059         PetscLogFlops(2*nz-1);
2060       }
2061       for (i=m-1; i>=0; i--){
2062         d  = *(aa + ai[i]);
2063         v  = aa + ai[i] + 1;
2064         vj = aj + ai[i] + 1;
2065         nz = ai[i+1] - ai[i] - 1;
2066         sum = t[i];
2067         while (nz--) sum -= x[*vj++]*(*v++);
2068         PetscLogFlops(2*nz-1);
2069         x[i] =   (1-omega)*x[i] + omega*sum/d;
2070       }
2071     }
2072   }
2073 
2074   ierr = PetscFree(t); CHKERRQ(ierr);
2075   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2076   if (bb != xx) {
2077     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2078   }
2079 
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 
2084 
2085 
2086 
2087 
2088