xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision d3e5a4aba4ec374cad326282c079b2a7eecda4bb)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
77   int         i,n = a->mbs,ierr;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
124   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
125   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
126   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
127 
128   if (a->inew){
129     ierr = PetscFree(a->inew);CHKERRQ(ierr);
130     a->inew = 0;
131   }
132   ierr = PetscFree(a);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
138 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
139 {
140   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
141 
142   PetscFunctionBegin;
143   switch (op) {
144   case MAT_ROW_ORIENTED:
145     a->roworiented    = PETSC_TRUE;
146     break;
147   case MAT_COLUMN_ORIENTED:
148     a->roworiented    = PETSC_FALSE;
149     break;
150   case MAT_COLUMNS_SORTED:
151     a->sorted         = PETSC_TRUE;
152     break;
153   case MAT_COLUMNS_UNSORTED:
154     a->sorted         = PETSC_FALSE;
155     break;
156   case MAT_KEEP_ZEROED_ROWS:
157     a->keepzeroedrows = PETSC_TRUE;
158     break;
159   case MAT_NO_NEW_NONZERO_LOCATIONS:
160     a->nonew          = 1;
161     break;
162   case MAT_NEW_NONZERO_LOCATION_ERR:
163     a->nonew          = -1;
164     break;
165   case MAT_NEW_NONZERO_ALLOCATION_ERR:
166     a->nonew          = -2;
167     break;
168   case MAT_YES_NEW_NONZERO_LOCATIONS:
169     a->nonew          = 0;
170     break;
171   case MAT_ROWS_SORTED:
172   case MAT_ROWS_UNSORTED:
173   case MAT_YES_NEW_DIAGONALS:
174   case MAT_IGNORE_OFF_PROC_ENTRIES:
175   case MAT_USE_HASH_TABLE:
176   case MAT_USE_SINGLE_PRECISION_SOLVES:
177     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
178     break;
179   case MAT_NO_NEW_DIAGONALS:
180     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
181   default:
182     SETERRQ(PETSC_ERR_SUP,"unknown option");
183   }
184   PetscFunctionReturn(0);
185 }
186 
187 #undef __FUNCT__
188 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
189 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
190 {
191   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
192   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
193   MatScalar    *aa,*aa_i;
194   PetscScalar  *v_i;
195 
196   PetscFunctionBegin;
197   bs  = a->bs;
198   ai  = a->i;
199   aj  = a->j;
200   aa  = a->a;
201   bs2 = a->bs2;
202 
203   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
204 
205   bn  = row/bs;   /* Block number */
206   bp  = row % bs; /* Block position */
207   M   = ai[bn+1] - ai[bn];
208   *ncols = bs*M;
209 
210   if (v) {
211     *v = 0;
212     if (*ncols) {
213       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
214       for (i=0; i<M; i++) { /* for each block in the block row */
215         v_i  = *v + i*bs;
216         aa_i = aa + bs2*(ai[bn] + i);
217         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
218       }
219     }
220   }
221 
222   if (cols) {
223     *cols = 0;
224     if (*ncols) {
225       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
226       for (i=0; i<M; i++) { /* for each block in the block row */
227         cols_i = *cols + i*bs;
228         itmp  = bs*aj[ai[bn] + i];
229         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
230       }
231     }
232   }
233 
234   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
235   /* this segment is currently removed, so only entries in the upper triangle are obtained */
236 #ifdef column_search
237   v_i    = *v    + M*bs;
238   cols_i = *cols + M*bs;
239   for (i=0; i<bn; i++){ /* for each block row */
240     M = ai[i+1] - ai[i];
241     for (j=0; j<M; j++){
242       itmp = aj[ai[i] + j];    /* block column value */
243       if (itmp == bn){
244         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
245         for (k=0; k<bs; k++) {
246           *cols_i++ = i*bs+k;
247           *v_i++    = aa_i[k];
248         }
249         *ncols += bs;
250         break;
251       }
252     }
253   }
254 #endif
255 
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
261 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
262 {
263   int ierr;
264 
265   PetscFunctionBegin;
266   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
267   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
273 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
274 {
275   int ierr;
276   PetscFunctionBegin;
277   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
283 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
284 {
285   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
286   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
287   PetscScalar  *aa;
288   FILE         *file;
289 
290   PetscFunctionBegin;
291   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
292   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
293   col_lens[0] = MAT_FILE_COOKIE;
294 
295   col_lens[1] = A->m;
296   col_lens[2] = A->m;
297   col_lens[3] = a->s_nz*bs2;
298 
299   /* store lengths of each row and write (including header) to file */
300   count = 0;
301   for (i=0; i<a->mbs; i++) {
302     for (j=0; j<bs; j++) {
303       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
304     }
305   }
306   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
307   ierr = PetscFree(col_lens);CHKERRQ(ierr);
308 
309   /* store column indices (zero start index) */
310   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
311   count = 0;
312   for (i=0; i<a->mbs; i++) {
313     for (j=0; j<bs; j++) {
314       for (k=a->i[i]; k<a->i[i+1]; k++) {
315         for (l=0; l<bs; l++) {
316           jj[count++] = bs*a->j[k] + l;
317         }
318       }
319     }
320   }
321   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
322   ierr = PetscFree(jj);CHKERRQ(ierr);
323 
324   /* store nonzero values */
325   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
326   count = 0;
327   for (i=0; i<a->mbs; i++) {
328     for (j=0; j<bs; j++) {
329       for (k=a->i[i]; k<a->i[i+1]; k++) {
330         for (l=0; l<bs; l++) {
331           aa[count++] = a->a[bs2*k + l*bs + j];
332         }
333       }
334     }
335   }
336   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
337   ierr = PetscFree(aa);CHKERRQ(ierr);
338 
339   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
340   if (file) {
341     fprintf(file,"-matload_block_size %d\n",a->bs);
342   }
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
348 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
349 {
350   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
351   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
352   char              *name;
353   PetscViewerFormat format;
354 
355   PetscFunctionBegin;
356   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
357   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
358   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
359     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
360   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
361     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
362   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
363     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
364     for (i=0; i<a->mbs; i++) {
365       for (j=0; j<bs; j++) {
366         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
367         for (k=a->i[i]; k<a->i[i+1]; k++) {
368           for (l=0; l<bs; l++) {
369 #if defined(PETSC_USE_COMPLEX)
370             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
371               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
372                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
373             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
374               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
375                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
376             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
377               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
378             }
379 #else
380             if (a->a[bs2*k + l*bs + j] != 0.0) {
381               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
382             }
383 #endif
384           }
385         }
386         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
387       }
388     }
389     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
390   } else {
391     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
392     for (i=0; i<a->mbs; i++) {
393       for (j=0; j<bs; j++) {
394         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
395         for (k=a->i[i]; k<a->i[i+1]; k++) {
396           for (l=0; l<bs; l++) {
397 #if defined(PETSC_USE_COMPLEX)
398             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
399               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
400                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
401             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
402               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
403                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
404             } else {
405               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
406             }
407 #else
408             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
409 #endif
410           }
411         }
412         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
413       }
414     }
415     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
416   }
417   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
423 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
424 {
425   Mat          A = (Mat) Aa;
426   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
427   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
428   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
429   MatScalar    *aa;
430   MPI_Comm     comm;
431   PetscViewer  viewer;
432 
433   PetscFunctionBegin;
434   /*
435       This is nasty. If this is called from an originally parallel matrix
436    then all processes call this,but only the first has the matrix so the
437    rest should return immediately.
438   */
439   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
440   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
441   if (rank) PetscFunctionReturn(0);
442 
443   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
444 
445   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
446   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
447 
448   /* loop over matrix elements drawing boxes */
449   color = PETSC_DRAW_BLUE;
450   for (i=0,row=0; i<mbs; i++,row+=bs) {
451     for (j=a->i[i]; j<a->i[i+1]; j++) {
452       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
453       x_l = a->j[j]*bs; x_r = x_l + 1.0;
454       aa = a->a + j*bs2;
455       for (k=0; k<bs; k++) {
456         for (l=0; l<bs; l++) {
457           if (PetscRealPart(*aa++) >=  0.) continue;
458           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
459         }
460       }
461     }
462   }
463   color = PETSC_DRAW_CYAN;
464   for (i=0,row=0; i<mbs; i++,row+=bs) {
465     for (j=a->i[i]; j<a->i[i+1]; j++) {
466       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
467       x_l = a->j[j]*bs; x_r = x_l + 1.0;
468       aa = a->a + j*bs2;
469       for (k=0; k<bs; k++) {
470         for (l=0; l<bs; l++) {
471           if (PetscRealPart(*aa++) != 0.) continue;
472           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
473         }
474       }
475     }
476   }
477 
478   color = PETSC_DRAW_RED;
479   for (i=0,row=0; i<mbs; i++,row+=bs) {
480     for (j=a->i[i]; j<a->i[i+1]; j++) {
481       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
482       x_l = a->j[j]*bs; x_r = x_l + 1.0;
483       aa = a->a + j*bs2;
484       for (k=0; k<bs; k++) {
485         for (l=0; l<bs; l++) {
486           if (PetscRealPart(*aa++) <= 0.) continue;
487           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
488         }
489       }
490     }
491   }
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
497 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
498 {
499   int          ierr;
500   PetscReal    xl,yl,xr,yr,w,h;
501   PetscDraw    draw;
502   PetscTruth   isnull;
503 
504   PetscFunctionBegin;
505 
506   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
507   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
508 
509   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
510   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
511   xr += w;    yr += h;  xl = -w;     yl = -h;
512   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
513   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
514   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517 
518 #undef __FUNCT__
519 #define __FUNCT__ "MatView_SeqSBAIJ"
520 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
521 {
522   int        ierr;
523   PetscTruth issocket,isascii,isbinary,isdraw;
524 
525   PetscFunctionBegin;
526   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
527   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
529   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
530   if (issocket) {
531     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
532   } else if (isascii){
533     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
534   } else if (isbinary) {
535     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
536   } else if (isdraw) {
537     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
538   } else {
539     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
547 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
548 {
549   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
550   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
551   int        *ai = a->i,*ailen = a->ilen;
552   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
553   MatScalar  *ap,*aa = a->a,zero = 0.0;
554 
555   PetscFunctionBegin;
556   for (k=0; k<m; k++) { /* loop over rows */
557     row  = im[k]; brow = row/bs;
558     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
559     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
560     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
561     nrow = ailen[brow];
562     for (l=0; l<n; l++) { /* loop over columns */
563       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
564       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
565       col  = in[l] ;
566       bcol = col/bs;
567       cidx = col%bs;
568       ridx = row%bs;
569       high = nrow;
570       low  = 0; /* assume unsorted */
571       while (high-low > 5) {
572         t = (low+high)/2;
573         if (rp[t] > bcol) high = t;
574         else             low  = t;
575       }
576       for (i=low; i<high; i++) {
577         if (rp[i] > bcol) break;
578         if (rp[i] == bcol) {
579           *v++ = ap[bs2*i+bs*cidx+ridx];
580           goto finished;
581         }
582       }
583       *v++ = zero;
584       finished:;
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
593 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
594 {
595   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
596   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
597   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
598   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
599   PetscTruth  roworiented=a->roworiented;
600   MatScalar   *value = v,*ap,*aa = a->a,*bap;
601 
602   PetscFunctionBegin;
603   if (roworiented) {
604     stepval = (n-1)*bs;
605   } else {
606     stepval = (m-1)*bs;
607   }
608   for (k=0; k<m; k++) { /* loop over added rows */
609     row  = im[k];
610     if (row < 0) continue;
611 #if defined(PETSC_USE_BOPT_g)
612     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
613 #endif
614     rp   = aj + ai[row];
615     ap   = aa + bs2*ai[row];
616     rmax = imax[row];
617     nrow = ailen[row];
618     low  = 0;
619     for (l=0; l<n; l++) { /* loop over added columns */
620       if (in[l] < 0) continue;
621 #if defined(PETSC_USE_BOPT_g)
622       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
623 #endif
624       col = in[l];
625       if (col < row) continue; /* ignore lower triangular block */
626       if (roworiented) {
627         value = v + k*(stepval+bs)*bs + l*bs;
628       } else {
629         value = v + l*(stepval+bs)*bs + k*bs;
630       }
631       if (!sorted) low = 0; high = nrow;
632       while (high-low > 7) {
633         t = (low+high)/2;
634         if (rp[t] > col) high = t;
635         else             low  = t;
636       }
637       for (i=low; i<high; i++) {
638         if (rp[i] > col) break;
639         if (rp[i] == col) {
640           bap  = ap +  bs2*i;
641           if (roworiented) {
642             if (is == ADD_VALUES) {
643               for (ii=0; ii<bs; ii++,value+=stepval) {
644                 for (jj=ii; jj<bs2; jj+=bs) {
645                   bap[jj] += *value++;
646                 }
647               }
648             } else {
649               for (ii=0; ii<bs; ii++,value+=stepval) {
650                 for (jj=ii; jj<bs2; jj+=bs) {
651                   bap[jj] = *value++;
652                 }
653               }
654             }
655           } else {
656             if (is == ADD_VALUES) {
657               for (ii=0; ii<bs; ii++,value+=stepval) {
658                 for (jj=0; jj<bs; jj++) {
659                   *bap++ += *value++;
660                 }
661               }
662             } else {
663               for (ii=0; ii<bs; ii++,value+=stepval) {
664                 for (jj=0; jj<bs; jj++) {
665                   *bap++  = *value++;
666                 }
667               }
668             }
669           }
670           goto noinsert2;
671         }
672       }
673       if (nonew == 1) goto noinsert2;
674       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
675       if (nrow >= rmax) {
676         /* there is no extra room in row, therefore enlarge */
677         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
678         MatScalar *new_a;
679 
680         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
681 
682         /* malloc new storage space */
683         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
684 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
685         new_j   = (int*)(new_a + bs2*new_nz);
686         new_i   = new_j + new_nz;
687 
688         /* copy over old data into new slots */
689         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
690         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
691         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
692         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
693         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
694         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
695         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
696         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
697         /* free up old matrix storage */
698         ierr = PetscFree(a->a);CHKERRQ(ierr);
699         if (!a->singlemalloc) {
700           ierr = PetscFree(a->i);CHKERRQ(ierr);
701           ierr = PetscFree(a->j);CHKERRQ(ierr);
702         }
703         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
704         a->singlemalloc = PETSC_TRUE;
705 
706         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
707         rmax = imax[row] = imax[row] + CHUNKSIZE;
708         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
709         a->s_maxnz += bs2*CHUNKSIZE;
710         a->reallocs++;
711         a->s_nz++;
712       }
713       N = nrow++ - 1;
714       /* shift up all the later entries in this row */
715       for (ii=N; ii>=i; ii--) {
716         rp[ii+1] = rp[ii];
717         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
718       }
719       if (N >= i) {
720         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
721       }
722       rp[i] = col;
723       bap   = ap +  bs2*i;
724       if (roworiented) {
725         for (ii=0; ii<bs; ii++,value+=stepval) {
726           for (jj=ii; jj<bs2; jj+=bs) {
727             bap[jj] = *value++;
728           }
729         }
730       } else {
731         for (ii=0; ii<bs; ii++,value+=stepval) {
732           for (jj=0; jj<bs; jj++) {
733             *bap++  = *value++;
734           }
735         }
736       }
737       noinsert2:;
738       low = i;
739     }
740     ailen[row] = nrow;
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
747 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
748 {
749   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
750   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
751   int        m = A->m,*ip,N,*ailen = a->ilen;
752   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
753   MatScalar  *aa = a->a,*ap;
754 #if defined(PETSC_HAVE_SPOOLES)
755   PetscTruth   flag;
756 #endif
757 
758   PetscFunctionBegin;
759   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
760 
761   if (m) rmax = ailen[0];
762   for (i=1; i<mbs; i++) {
763     /* move each row back by the amount of empty slots (fshift) before it*/
764     fshift += imax[i-1] - ailen[i-1];
765     rmax   = PetscMax(rmax,ailen[i]);
766     if (fshift) {
767       ip = aj + ai[i]; ap = aa + bs2*ai[i];
768       N = ailen[i];
769       for (j=0; j<N; j++) {
770         ip[j-fshift] = ip[j];
771         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
772       }
773     }
774     ai[i] = ai[i-1] + ailen[i-1];
775   }
776   if (mbs) {
777     fshift += imax[mbs-1] - ailen[mbs-1];
778     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
779   }
780   /* reset ilen and imax for each row */
781   for (i=0; i<mbs; i++) {
782     ailen[i] = imax[i] = ai[i+1] - ai[i];
783   }
784   a->s_nz = ai[mbs];
785 
786   /* diagonals may have moved, reset it */
787   if (a->diag) {
788     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
789   }
790   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
791            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
792   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
793            a->reallocs);
794   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
795   a->reallocs          = 0;
796   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
797 
798 #if defined(PETSC_HAVE_SPOOLES)
799   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
800   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); }
801 #endif
802 
803   PetscFunctionReturn(0);
804 }
805 
806 /*
807    This function returns an array of flags which indicate the locations of contiguous
808    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
809    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
810    Assume: sizes should be long enough to hold all the values.
811 */
812 #undef __FUNCT__
813 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
814 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
815 {
816   int        i,j,k,row;
817   PetscTruth flg;
818 
819   PetscFunctionBegin;
820   for (i=0,j=0; i<n; j++) {
821     row = idx[i];
822     if (row%bs!=0) { /* Not the begining of a block */
823       sizes[j] = 1;
824       i++;
825     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
826       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
827       i++;
828     } else { /* Begining of the block, so check if the complete block exists */
829       flg = PETSC_TRUE;
830       for (k=1; k<bs; k++) {
831         if (row+k != idx[i+k]) { /* break in the block */
832           flg = PETSC_FALSE;
833           break;
834         }
835       }
836       if (flg == PETSC_TRUE) { /* No break in the bs */
837         sizes[j] = bs;
838         i+= bs;
839       } else {
840         sizes[j] = 1;
841         i++;
842       }
843     }
844   }
845   *bs_max = j;
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
851 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag)
852 {
853   PetscFunctionBegin;
854   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
855 }
856 
857 /* Only add/insert a(i,j) with i<=j (blocks).
858    Any a(i,j) with i>j input by user is ingored.
859 */
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
863 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
864 {
865   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
866   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
867   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
868   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
869   int         ridx,cidx,bs2=a->bs2,ierr;
870   MatScalar   *ap,value,*aa=a->a,*bap;
871 
872   PetscFunctionBegin;
873 
874   for (k=0; k<m; k++) { /* loop over added rows */
875     row  = im[k];       /* row number */
876     brow = row/bs;      /* block row number */
877     if (row < 0) continue;
878 #if defined(PETSC_USE_BOPT_g)
879     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
880 #endif
881     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
882     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
883     rmax = imax[brow];  /* maximum space allocated for this row */
884     nrow = ailen[brow]; /* actual length of this row */
885     low  = 0;
886 
887     for (l=0; l<n; l++) { /* loop over added columns */
888       if (in[l] < 0) continue;
889 #if defined(PETSC_USE_BOPT_g)
890       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
891 #endif
892       col = in[l];
893       bcol = col/bs;              /* block col number */
894 
895       if (brow <= bcol){
896         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
897         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
898           /* element value a(k,l) */
899           if (roworiented) {
900             value = v[l + k*n];
901           } else {
902             value = v[k + l*m];
903           }
904 
905           /* move pointer bap to a(k,l) quickly and add/insert value */
906           if (!sorted) low = 0; high = nrow;
907           while (high-low > 7) {
908             t = (low+high)/2;
909             if (rp[t] > bcol) high = t;
910             else              low  = t;
911           }
912           for (i=low; i<high; i++) {
913             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
914             if (rp[i] > bcol) break;
915             if (rp[i] == bcol) {
916               bap  = ap +  bs2*i + bs*cidx + ridx;
917               if (is == ADD_VALUES) *bap += value;
918               else                  *bap  = value;
919               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
920               if (brow == bcol && ridx < cidx){
921                 bap  = ap +  bs2*i + bs*ridx + cidx;
922                 if (is == ADD_VALUES) *bap += value;
923                 else                  *bap  = value;
924               }
925               goto noinsert1;
926             }
927           }
928 
929       if (nonew == 1) goto noinsert1;
930       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
931       if (nrow >= rmax) {
932         /* there is no extra room in row, therefore enlarge */
933         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
934         MatScalar *new_a;
935 
936         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
937 
938         /* Malloc new storage space */
939         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
940         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
941         new_j = (int*)(new_a + bs2*new_nz);
942         new_i = new_j + new_nz;
943 
944         /* copy over old data into new slots */
945         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
946         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
947         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
948         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
949         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
950         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
951         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
952         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
953         /* free up old matrix storage */
954         ierr = PetscFree(a->a);CHKERRQ(ierr);
955         if (!a->singlemalloc) {
956           ierr = PetscFree(a->i);CHKERRQ(ierr);
957           ierr = PetscFree(a->j);CHKERRQ(ierr);
958         }
959         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
960         a->singlemalloc = PETSC_TRUE;
961 
962         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
963         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
964         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
965         a->s_maxnz += bs2*CHUNKSIZE;
966         a->reallocs++;
967         a->s_nz++;
968       }
969 
970       N = nrow++ - 1;
971       /* shift up all the later entries in this row */
972       for (ii=N; ii>=i; ii--) {
973         rp[ii+1] = rp[ii];
974         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
975       }
976       if (N>=i) {
977         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
978       }
979       rp[i]                      = bcol;
980       ap[bs2*i + bs*cidx + ridx] = value;
981       noinsert1:;
982       low = i;
983       /* } */
984         }
985       } /* end of if .. if..  */
986     }                     /* end of loop over added columns */
987     ailen[brow] = nrow;
988   }                       /* end of loop over added rows */
989 
990   PetscFunctionReturn(0);
991 }
992 
993 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
994 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
995 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
996 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
997 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
998 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
999 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
1000 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat);
1001 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
1002 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1003 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1004 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1005 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1006 extern int MatZeroEntries_SeqSBAIJ(Mat);
1007 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1008 
1009 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1010 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1011 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1012 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1013 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1014 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1015 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1016 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1017 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1018 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1019 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1020 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1021 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1022 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1023 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1024 
1025 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1026 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1027 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1028 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1029 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1030 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1031 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1032 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1033 
1034 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1035 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1042 
1043 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1044 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1045 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1046 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1047 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1048 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1049 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1050 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1051 
1052 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1053 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1054 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1055 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1056 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1057 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1058 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1059 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1060 
1061 #ifdef HAVE_MatICCFactor
1062 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1065 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
1066 {
1067   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1068   Mat         outA;
1069   int         ierr;
1070   PetscTruth  row_identity,col_identity;
1071 
1072   PetscFunctionBegin;
1073   /*
1074   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1075   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1076   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1077   if (!row_identity || !col_identity) {
1078     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1079   }
1080   */
1081 
1082   outA          = inA;
1083   inA->factor   = FACTOR_CHOLESKY;
1084 
1085   if (!a->diag) {
1086     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1087   }
1088   /*
1089       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1090       for ILU(0) factorization with natural ordering
1091   */
1092   switch (a->bs) {
1093   case 1:
1094     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1095     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1096     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1097   case 2:
1098     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1099     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1100     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1101     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1102     break;
1103   case 3:
1104     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1105     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1106     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1107     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1108     break;
1109   case 4:
1110     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1111     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1112     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1113     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1114     break;
1115   case 5:
1116     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1117     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1118     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1119     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1120     break;
1121   case 6:
1122     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1123     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1124     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1125     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1126     break;
1127   case 7:
1128     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1129     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1130     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1131     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1132     break;
1133   default:
1134     a->row        = row;
1135     a->icol       = col;
1136     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1137     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1138 
1139     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1140     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1141     PetscLogObjectParent(inA,a->icol);
1142 
1143     if (!a->solve_work) {
1144       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1145       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1146     }
1147   }
1148 
1149   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1150 
1151   PetscFunctionReturn(0);
1152 }
1153 #endif
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1157 int MatPrintHelp_SeqSBAIJ(Mat A)
1158 {
1159   static PetscTruth called = PETSC_FALSE;
1160   MPI_Comm          comm = A->comm;
1161   int               ierr;
1162 
1163   PetscFunctionBegin;
1164   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1165   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1166   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 EXTERN_C_BEGIN
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1173 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1174 {
1175   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1176   int         i,nz,n;
1177 
1178   PetscFunctionBegin;
1179   nz = baij->s_maxnz;
1180   n  = mat->n;
1181   for (i=0; i<nz; i++) {
1182     baij->j[i] = indices[i];
1183   }
1184   baij->s_nz = nz;
1185   for (i=0; i<n; i++) {
1186     baij->ilen[i] = baij->imax[i];
1187   }
1188 
1189   PetscFunctionReturn(0);
1190 }
1191 EXTERN_C_END
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1195 /*@
1196     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1197        in the matrix.
1198 
1199   Input Parameters:
1200 +  mat     - the SeqSBAIJ matrix
1201 -  indices - the column indices
1202 
1203   Level: advanced
1204 
1205   Notes:
1206     This can be called if you have precomputed the nonzero structure of the
1207   matrix and want to provide it to the matrix object to improve the performance
1208   of the MatSetValues() operation.
1209 
1210     You MUST have set the correct numbers of nonzeros per row in the call to
1211   MatCreateSeqSBAIJ().
1212 
1213     MUST be called before any calls to MatSetValues()
1214 
1215 .seealso: MatCreateSeqSBAIJ
1216 @*/
1217 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1218 {
1219   int ierr,(*f)(Mat,int *);
1220 
1221   PetscFunctionBegin;
1222   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1223   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1224   if (f) {
1225     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1226   } else {
1227     SETERRQ(1,"Wrong type of matrix to set column indices");
1228   }
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1234 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1235 {
1236   int        ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1245 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array)
1246 {
1247   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1248   PetscFunctionBegin;
1249   *array = a->a;
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1255 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array)
1256 {
1257   PetscFunctionBegin;
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #include "petscblaslapack.h"
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1264 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1265 {
1266   int          ierr,one=1;
1267   Mat_SeqSBAIJ *x  = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data;
1268 
1269   PetscFunctionBegin;
1270   if (str == SAME_NONZERO_PATTERN) {
1271     BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one);
1272   } else {
1273     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1274   }
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 /* -------------------------------------------------------------------*/
1279 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1280        MatGetRow_SeqSBAIJ,
1281        MatRestoreRow_SeqSBAIJ,
1282        MatMult_SeqSBAIJ_N,
1283        MatMultAdd_SeqSBAIJ_N,
1284        MatMultTranspose_SeqSBAIJ,
1285        MatMultTransposeAdd_SeqSBAIJ,
1286        MatSolve_SeqSBAIJ_N,
1287        0,
1288        0,
1289        0,
1290        0,
1291        MatCholeskyFactor_SeqSBAIJ,
1292        MatRelax_SeqSBAIJ,
1293        MatTranspose_SeqSBAIJ,
1294        MatGetInfo_SeqSBAIJ,
1295        MatEqual_SeqSBAIJ,
1296        MatGetDiagonal_SeqSBAIJ,
1297        MatDiagonalScale_SeqSBAIJ,
1298        MatNorm_SeqSBAIJ,
1299        0,
1300        MatAssemblyEnd_SeqSBAIJ,
1301        0,
1302        MatSetOption_SeqSBAIJ,
1303        MatZeroEntries_SeqSBAIJ,
1304        MatZeroRows_SeqSBAIJ,
1305        0,
1306        0,
1307        MatCholeskyFactorSymbolic_SeqSBAIJ,
1308        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1309        MatSetUpPreallocation_SeqSBAIJ,
1310        0,
1311        MatICCFactorSymbolic_SeqSBAIJ,
1312        MatGetArray_SeqSBAIJ,
1313        MatRestoreArray_SeqSBAIJ,
1314        MatDuplicate_SeqSBAIJ,
1315        0,
1316        0,
1317        0,
1318        0,
1319        MatAXPY_SeqSBAIJ,
1320        MatGetSubMatrices_SeqSBAIJ,
1321        MatIncreaseOverlap_SeqSBAIJ,
1322        MatGetValues_SeqSBAIJ,
1323        0,
1324        MatPrintHelp_SeqSBAIJ,
1325        MatScale_SeqSBAIJ,
1326        0,
1327        0,
1328        0,
1329        MatGetBlockSize_SeqSBAIJ,
1330        MatGetRowIJ_SeqSBAIJ,
1331        MatRestoreRowIJ_SeqSBAIJ,
1332        0,
1333        0,
1334        0,
1335        0,
1336        0,
1337        0,
1338        MatSetValuesBlocked_SeqSBAIJ,
1339        MatGetSubMatrix_SeqSBAIJ,
1340        0,
1341        0,
1342        MatGetPetscMaps_Petsc,
1343        0,
1344        0,
1345        0,
1346        0,
1347        0,
1348        0,
1349        MatGetRowMax_SeqSBAIJ};
1350 
1351 EXTERN_C_BEGIN
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1354 int MatStoreValues_SeqSBAIJ(Mat mat)
1355 {
1356   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1357   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1358   int         ierr;
1359 
1360   PetscFunctionBegin;
1361   if (aij->nonew != 1) {
1362     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1363   }
1364 
1365   /* allocate space for values if not already there */
1366   if (!aij->saved_values) {
1367     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1368   }
1369 
1370   /* copy values over */
1371   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 EXTERN_C_END
1375 
1376 EXTERN_C_BEGIN
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1379 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1380 {
1381   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1382   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1383 
1384   PetscFunctionBegin;
1385   if (aij->nonew != 1) {
1386     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1387   }
1388   if (!aij->saved_values) {
1389     SETERRQ(1,"Must call MatStoreValues(A);first");
1390   }
1391 
1392   /* copy values over */
1393   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 EXTERN_C_END
1397 
1398 EXTERN_C_BEGIN
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1401 int MatCreate_SeqSBAIJ(Mat B)
1402 {
1403   Mat_SeqSBAIJ *b;
1404   int          ierr,size;
1405 
1406   PetscFunctionBegin;
1407   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1408   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1409   B->m = B->M = PetscMax(B->m,B->M);
1410   B->n = B->N = PetscMax(B->n,B->N);
1411 
1412   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1413   B->data = (void*)b;
1414   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1415   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1416   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1417   B->ops->view        = MatView_SeqSBAIJ;
1418   B->factor           = 0;
1419   B->lupivotthreshold = 1.0;
1420   B->mapping          = 0;
1421   b->row              = 0;
1422   b->icol             = 0;
1423   b->reallocs         = 0;
1424   b->saved_values     = 0;
1425 
1426   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1427   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1428 
1429   b->sorted           = PETSC_FALSE;
1430   b->roworiented      = PETSC_TRUE;
1431   b->nonew            = 0;
1432   b->diag             = 0;
1433   b->solve_work       = 0;
1434   b->mult_work        = 0;
1435   B->spptr            = 0;
1436   b->keepzeroedrows   = PETSC_FALSE;
1437 
1438   b->inew             = 0;
1439   b->jnew             = 0;
1440   b->anew             = 0;
1441   b->a2anew           = 0;
1442   b->permute          = PETSC_FALSE;
1443 
1444   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1445                                      "MatStoreValues_SeqSBAIJ",
1446                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1447   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1448                                      "MatRetrieveValues_SeqSBAIJ",
1449                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1450   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1451                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1452                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1453   PetscFunctionReturn(0);
1454 }
1455 EXTERN_C_END
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1459 /*@C
1460    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1461    compressed row) format.  For good matrix assembly performance the
1462    user should preallocate the matrix storage by setting the parameter nz
1463    (or the array nnz).  By setting these parameters accurately, performance
1464    during matrix assembly can be increased by more than a factor of 50.
1465 
1466    Collective on Mat
1467 
1468    Input Parameters:
1469 +  A - the symmetric matrix
1470 .  bs - size of block
1471 .  nz - number of block nonzeros per block row (same for all rows)
1472 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1473          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1474 
1475    Options Database Keys:
1476 .   -mat_no_unroll - uses code that does not unroll the loops in the
1477                      block calculations (much slower)
1478 .    -mat_block_size - size of the blocks to use
1479 
1480    Level: intermediate
1481 
1482    Notes:
1483    Specify the preallocated storage with either nz or nnz (not both).
1484    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1485    allocation.  For additional details, see the users manual chapter on
1486    matrices.
1487 
1488 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1489 @*/
1490 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1491 {
1492   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1493   int          i,len,ierr,mbs,bs2;
1494   PetscTruth   flg;
1495   int          s_nz;
1496 
1497   PetscFunctionBegin;
1498   B->preallocated = PETSC_TRUE;
1499   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1500   mbs  = B->m/bs;
1501   bs2  = bs*bs;
1502 
1503   if (mbs*bs != B->m) {
1504     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1505   }
1506 
1507   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1508   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1509   if (nnz) {
1510     for (i=0; i<mbs; i++) {
1511       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1512       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1513     }
1514   }
1515 
1516   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1517   if (!flg) {
1518     switch (bs) {
1519     case 1:
1520       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1521       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1522       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1523       B->ops->mult            = MatMult_SeqSBAIJ_1;
1524       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1525       break;
1526     case 2:
1527       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1528       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1529       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1530       B->ops->mult            = MatMult_SeqSBAIJ_2;
1531       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1532       break;
1533     case 3:
1534       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1535       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1536       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1537       B->ops->mult            = MatMult_SeqSBAIJ_3;
1538       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1539       break;
1540     case 4:
1541       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1542       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1543       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1544       B->ops->mult            = MatMult_SeqSBAIJ_4;
1545       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1546       break;
1547     case 5:
1548       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1549       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1550       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1551       B->ops->mult            = MatMult_SeqSBAIJ_5;
1552       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1553       break;
1554     case 6:
1555       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1556       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1557       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1558       B->ops->mult            = MatMult_SeqSBAIJ_6;
1559       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1560       break;
1561     case 7:
1562       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1563       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1564       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1565       B->ops->mult            = MatMult_SeqSBAIJ_7;
1566       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1567       break;
1568     }
1569   }
1570 
1571   b->mbs = mbs;
1572   b->nbs = mbs;
1573   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1574   if (!nnz) {
1575     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1576     else if (nz <= 0)        nz = 1;
1577     for (i=0; i<mbs; i++) {
1578       b->imax[i] = nz;
1579     }
1580     nz = nz*mbs; /* total nz */
1581   } else {
1582     nz = 0;
1583     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1584   }
1585   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1586   s_nz = nz;
1587 
1588   /* allocate the matrix space */
1589   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1590   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1591   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1592   b->j = (int*)(b->a + s_nz*bs2);
1593   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1594   b->i = b->j + s_nz;
1595   b->singlemalloc = PETSC_TRUE;
1596 
1597   /* pointer to beginning of each row */
1598   b->i[0] = 0;
1599   for (i=1; i<mbs+1; i++) {
1600     b->i[i] = b->i[i-1] + b->imax[i-1];
1601   }
1602 
1603   /* b->ilen will count nonzeros in each block row so far. */
1604   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1605   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1606   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1607 
1608   b->bs               = bs;
1609   b->bs2              = bs2;
1610   b->s_nz             = 0;
1611   b->s_maxnz          = s_nz*bs2;
1612 
1613   b->inew             = 0;
1614   b->jnew             = 0;
1615   b->anew             = 0;
1616   b->a2anew           = 0;
1617   b->permute          = PETSC_FALSE;
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatCreateSeqSBAIJ"
1624 /*@C
1625    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1626    compressed row) format.  For good matrix assembly performance the
1627    user should preallocate the matrix storage by setting the parameter nz
1628    (or the array nnz).  By setting these parameters accurately, performance
1629    during matrix assembly can be increased by more than a factor of 50.
1630 
1631    Collective on MPI_Comm
1632 
1633    Input Parameters:
1634 +  comm - MPI communicator, set to PETSC_COMM_SELF
1635 .  bs - size of block
1636 .  m - number of rows, or number of columns
1637 .  nz - number of block nonzeros per block row (same for all rows)
1638 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1639          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1640 
1641    Output Parameter:
1642 .  A - the symmetric matrix
1643 
1644    Options Database Keys:
1645 .   -mat_no_unroll - uses code that does not unroll the loops in the
1646                      block calculations (much slower)
1647 .    -mat_block_size - size of the blocks to use
1648 
1649    Level: intermediate
1650 
1651    Notes:
1652 
1653    Specify the preallocated storage with either nz or nnz (not both).
1654    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1655    allocation.  For additional details, see the users manual chapter on
1656    matrices.
1657 
1658 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1659 @*/
1660 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1661 {
1662   int ierr;
1663 
1664   PetscFunctionBegin;
1665   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1666   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1667   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1673 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1674 {
1675   Mat         C;
1676   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1677   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1678 
1679   PetscFunctionBegin;
1680   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1681 
1682   *B = 0;
1683   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1684   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1685   c    = (Mat_SeqSBAIJ*)C->data;
1686 
1687   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1688   C->preallocated   = PETSC_TRUE;
1689   C->factor         = A->factor;
1690   c->row            = 0;
1691   c->icol           = 0;
1692   c->saved_values   = 0;
1693   c->keepzeroedrows = a->keepzeroedrows;
1694   C->assembled      = PETSC_TRUE;
1695 
1696   c->bs         = a->bs;
1697   c->bs2        = a->bs2;
1698   c->mbs        = a->mbs;
1699   c->nbs        = a->nbs;
1700 
1701   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1702   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1703   for (i=0; i<mbs; i++) {
1704     c->imax[i] = a->imax[i];
1705     c->ilen[i] = a->ilen[i];
1706   }
1707 
1708   /* allocate the matrix space */
1709   c->singlemalloc = PETSC_TRUE;
1710   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1711   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1712   c->j = (int*)(c->a + nz*bs2);
1713   c->i = c->j + nz;
1714   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1715   if (mbs > 0) {
1716     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1717     if (cpvalues == MAT_COPY_VALUES) {
1718       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1719     } else {
1720       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1721     }
1722   }
1723 
1724   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1725   c->sorted      = a->sorted;
1726   c->roworiented = a->roworiented;
1727   c->nonew       = a->nonew;
1728 
1729   if (a->diag) {
1730     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1731     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1732     for (i=0; i<mbs; i++) {
1733       c->diag[i] = a->diag[i];
1734     }
1735   } else c->diag        = 0;
1736   c->s_nz               = a->s_nz;
1737   c->s_maxnz            = a->s_maxnz;
1738   c->solve_work         = 0;
1739   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1740   c->mult_work          = 0;
1741   *B = C;
1742   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 EXTERN_C_BEGIN
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1749 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1750 {
1751   Mat_SeqSBAIJ *a;
1752   Mat          B;
1753   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1754   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1755   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1756   int          *masked,nmask,tmp,bs2,ishift;
1757   PetscScalar  *aa;
1758   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1759 
1760   PetscFunctionBegin;
1761   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1762   bs2  = bs*bs;
1763 
1764   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1765   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1766   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1767   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1768   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1769   M = header[1]; N = header[2]; nz = header[3];
1770 
1771   if (header[3] < 0) {
1772     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1773   }
1774 
1775   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1776 
1777   /*
1778      This code adds extra rows to make sure the number of rows is
1779     divisible by the blocksize
1780   */
1781   mbs        = M/bs;
1782   extra_rows = bs - M + bs*(mbs);
1783   if (extra_rows == bs) extra_rows = 0;
1784   else                  mbs++;
1785   if (extra_rows) {
1786     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1787   }
1788 
1789   /* read in row lengths */
1790   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1791   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1792   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1793 
1794   /* read in column indices */
1795   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1796   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1797   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1798 
1799   /* loop over row lengths determining block row lengths */
1800   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1801   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1802   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1803   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1804   masked   = mask + mbs;
1805   rowcount = 0; nzcount = 0;
1806   for (i=0; i<mbs; i++) {
1807     nmask = 0;
1808     for (j=0; j<bs; j++) {
1809       kmax = rowlengths[rowcount];
1810       for (k=0; k<kmax; k++) {
1811         tmp = jj[nzcount++]/bs;   /* block col. index */
1812         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1813       }
1814       rowcount++;
1815     }
1816     s_browlengths[i] += nmask;
1817 
1818     /* zero out the mask elements we set */
1819     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1820   }
1821 
1822   /* create our matrix */
1823   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr);
1824   B = *A;
1825   a = (Mat_SeqSBAIJ*)B->data;
1826 
1827   /* set matrix "i" values */
1828   a->i[0] = 0;
1829   for (i=1; i<= mbs; i++) {
1830     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1831     a->ilen[i-1] = s_browlengths[i-1];
1832   }
1833   a->s_nz = a->i[mbs];
1834 
1835   /* read in nonzero values */
1836   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1837   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1838   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1839 
1840   /* set "a" and "j" values into matrix */
1841   nzcount = 0; jcount = 0;
1842   for (i=0; i<mbs; i++) {
1843     nzcountb = nzcount;
1844     nmask    = 0;
1845     for (j=0; j<bs; j++) {
1846       kmax = rowlengths[i*bs+j];
1847       for (k=0; k<kmax; k++) {
1848         tmp = jj[nzcount++]/bs; /* block col. index */
1849         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1850       }
1851     }
1852     /* sort the masked values */
1853     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1854 
1855     /* set "j" values into matrix */
1856     maskcount = 1;
1857     for (j=0; j<nmask; j++) {
1858       a->j[jcount++]  = masked[j];
1859       mask[masked[j]] = maskcount++;
1860     }
1861 
1862     /* set "a" values into matrix */
1863     ishift = bs2*a->i[i];
1864     for (j=0; j<bs; j++) {
1865       kmax = rowlengths[i*bs+j];
1866       for (k=0; k<kmax; k++) {
1867         tmp       = jj[nzcountb]/bs ; /* block col. index */
1868         if (tmp >= i){
1869           block     = mask[tmp] - 1;
1870           point     = jj[nzcountb] - bs*tmp;
1871           idx       = ishift + bs2*block + j + bs*point;
1872           a->a[idx] = aa[nzcountb];
1873         }
1874         nzcountb++;
1875       }
1876     }
1877     /* zero out the mask elements we set */
1878     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1879   }
1880   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1881 
1882   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1883   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1884   ierr = PetscFree(aa);CHKERRQ(ierr);
1885   ierr = PetscFree(jj);CHKERRQ(ierr);
1886   ierr = PetscFree(mask);CHKERRQ(ierr);
1887 
1888   B->assembled = PETSC_TRUE;
1889   ierr = MatView_Private(B);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 EXTERN_C_END
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1896 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1897 {
1898   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1899   MatScalar    *aa=a->a,*v,*v1;
1900   PetscScalar  *x,*b,*t,sum,d;
1901   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1902   int          nz,nz1,*vj,*vj1,i;
1903 
1904   PetscFunctionBegin;
1905   its = its*lits;
1906   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1907 
1908   if (bs > 1)
1909     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1910 
1911   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1912   if (xx != bb) {
1913     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1914   } else {
1915     b = x;
1916   }
1917 
1918   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1919 
1920   if (flag & SOR_ZERO_INITIAL_GUESS) {
1921     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1922       for (i=0; i<m; i++)
1923         t[i] = b[i];
1924 
1925       for (i=0; i<m; i++){
1926         d  = *(aa + ai[i]);  /* diag[i] */
1927         v  = aa + ai[i] + 1;
1928         vj = aj + ai[i] + 1;
1929         nz = ai[i+1] - ai[i] - 1;
1930         x[i] = omega*t[i]/d;
1931         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1932         PetscLogFlops(2*nz-1);
1933       }
1934     }
1935 
1936     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1937       for (i=0; i<m; i++)
1938         t[i] = b[i];
1939 
1940       for (i=0; i<m-1; i++){  /* update rhs */
1941         v  = aa + ai[i] + 1;
1942         vj = aj + ai[i] + 1;
1943         nz = ai[i+1] - ai[i] - 1;
1944         while (nz--) t[*vj++] -= x[i]*(*v++);
1945         PetscLogFlops(2*nz-1);
1946       }
1947       for (i=m-1; i>=0; i--){
1948         d  = *(aa + ai[i]);
1949         v  = aa + ai[i] + 1;
1950         vj = aj + ai[i] + 1;
1951         nz = ai[i+1] - ai[i] - 1;
1952         sum = t[i];
1953         while (nz--) sum -= x[*vj++]*(*v++);
1954         PetscLogFlops(2*nz-1);
1955         x[i] =   (1-omega)*x[i] + omega*sum/d;
1956       }
1957     }
1958     its--;
1959   }
1960 
1961   while (its--) {
1962     /*
1963        forward sweep:
1964        for i=0,...,m-1:
1965          sum[i] = (b[i] - U(i,:)x )/d[i];
1966          x[i]   = (1-omega)x[i] + omega*sum[i];
1967          b      = b - x[i]*U^T(i,:);
1968 
1969     */
1970     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1971       for (i=0; i<m; i++)
1972         t[i] = b[i];
1973 
1974       for (i=0; i<m; i++){
1975         d  = *(aa + ai[i]);  /* diag[i] */
1976         v  = aa + ai[i] + 1; v1=v;
1977         vj = aj + ai[i] + 1; vj1=vj;
1978         nz = ai[i+1] - ai[i] - 1; nz1=nz;
1979         sum = t[i];
1980         while (nz1--) sum -= (*v1++)*x[*vj1++];
1981         x[i] = (1-omega)*x[i] + omega*sum/d;
1982         while (nz--) t[*vj++] -= x[i]*(*v++);
1983         PetscLogFlops(4*nz-2);
1984       }
1985     }
1986 
1987   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1988       /*
1989        backward sweep:
1990        b = b - x[i]*U^T(i,:), i=0,...,n-2
1991        for i=m-1,...,0:
1992          sum[i] = (b[i] - U(i,:)x )/d[i];
1993          x[i]   = (1-omega)x[i] + omega*sum[i];
1994       */
1995       for (i=0; i<m; i++)
1996         t[i] = b[i];
1997 
1998       for (i=0; i<m-1; i++){  /* update rhs */
1999         v  = aa + ai[i] + 1;
2000         vj = aj + ai[i] + 1;
2001         nz = ai[i+1] - ai[i] - 1;
2002         while (nz--) t[*vj++] -= x[i]*(*v++);
2003         PetscLogFlops(2*nz-1);
2004       }
2005       for (i=m-1; i>=0; i--){
2006         d  = *(aa + ai[i]);
2007         v  = aa + ai[i] + 1;
2008         vj = aj + ai[i] + 1;
2009         nz = ai[i+1] - ai[i] - 1;
2010         sum = t[i];
2011         while (nz--) sum -= x[*vj++]*(*v++);
2012         PetscLogFlops(2*nz-1);
2013         x[i] =   (1-omega)*x[i] + omega*sum/d;
2014       }
2015     }
2016   }
2017 
2018   ierr = PetscFree(t); CHKERRQ(ierr);
2019   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2020   if (bb != xx) {
2021     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2022   }
2023 
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 
2028 
2029 
2030 
2031 
2032