xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 42ee4b1a0fe47bea72b3c9fc9e20052152ab983c)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 #if defined(PETSC_HAVE_SPOOLES)
12 EXTERN int MatUseSpooles_SeqSBAIJ(Mat);
13 #endif
14 
15 #define CHUNKSIZE  10
16 
17 /*
18      Checks for missing diagonals
19 */
20 #undef __FUNCT__
21 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
22 int MatMissingDiagonal_SeqSBAIJ(Mat A)
23 {
24   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
25   int         *diag,*jj = a->j,i,ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
29   diag = a->diag;
30   for (i=0; i<a->mbs; i++) {
31     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
32   }
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
38 int MatMarkDiagonal_SeqSBAIJ(Mat A)
39 {
40   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
41   int          i,mbs = a->mbs,ierr;
42 
43   PetscFunctionBegin;
44   if (a->diag) PetscFunctionReturn(0);
45 
46   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
47   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
48   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
49   PetscFunctionReturn(0);
50 }
51 
52 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
56 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
57 {
58   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
59 
60   PetscFunctionBegin;
61   if (ia) {
62     SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
63   }
64   *nn = a->mbs;
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
70 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
71 {
72   PetscFunctionBegin;
73   if (!ia) PetscFunctionReturn(0);
74   SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
75   /* PetscFunctionReturn(0); */
76 }
77 
78 #undef __FUNCT__
79 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
80 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
81 {
82   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
83 
84   PetscFunctionBegin;
85   *bs = sbaij->bs;
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNCT__
90 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
91 int MatDestroy_SeqSBAIJ(Mat A)
92 {
93   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
94   int         ierr;
95 
96   PetscFunctionBegin;
97 #if defined(PETSC_USE_LOG)
98   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
99 #endif
100   ierr = PetscFree(a->a);CHKERRQ(ierr);
101   if (!a->singlemalloc) {
102     ierr = PetscFree(a->i);CHKERRQ(ierr);
103     ierr = PetscFree(a->j);CHKERRQ(ierr);
104   }
105   if (a->row) {
106     ierr = ISDestroy(a->row);CHKERRQ(ierr);
107   }
108   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
109   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
110   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
111   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
112   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
113   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
114   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
115 
116   if (a->inew){
117     ierr = PetscFree(a->inew);CHKERRQ(ierr);
118     a->inew = 0;
119   }
120   ierr = PetscFree(a);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
126 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
127 {
128   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
129 
130   PetscFunctionBegin;
131   switch (op) {
132   case MAT_ROW_ORIENTED:
133     a->roworiented    = PETSC_TRUE;
134     break;
135   case MAT_COLUMN_ORIENTED:
136     a->roworiented    = PETSC_FALSE;
137     break;
138   case MAT_COLUMNS_SORTED:
139     a->sorted         = PETSC_TRUE;
140     break;
141   case MAT_COLUMNS_UNSORTED:
142     a->sorted         = PETSC_FALSE;
143     break;
144   case MAT_KEEP_ZEROED_ROWS:
145     a->keepzeroedrows = PETSC_TRUE;
146     break;
147   case MAT_NO_NEW_NONZERO_LOCATIONS:
148     a->nonew          = 1;
149     break;
150   case MAT_NEW_NONZERO_LOCATION_ERR:
151     a->nonew          = -1;
152     break;
153   case MAT_NEW_NONZERO_ALLOCATION_ERR:
154     a->nonew          = -2;
155     break;
156   case MAT_YES_NEW_NONZERO_LOCATIONS:
157     a->nonew          = 0;
158     break;
159   case MAT_ROWS_SORTED:
160   case MAT_ROWS_UNSORTED:
161   case MAT_YES_NEW_DIAGONALS:
162   case MAT_IGNORE_OFF_PROC_ENTRIES:
163   case MAT_USE_HASH_TABLE:
164   case MAT_USE_SINGLE_PRECISION_SOLVES:
165     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
166     break;
167   case MAT_NO_NEW_DIAGONALS:
168     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
169   default:
170     SETERRQ(PETSC_ERR_SUP,"unknown option");
171   }
172   PetscFunctionReturn(0);
173 }
174 
175 #undef __FUNCT__
176 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
177 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
178 {
179   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
180   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
181   MatScalar    *aa,*aa_i;
182   PetscScalar  *v_i;
183 
184   PetscFunctionBegin;
185   bs  = a->bs;
186   ai  = a->i;
187   aj  = a->j;
188   aa  = a->a;
189   bs2 = a->bs2;
190 
191   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
192 
193   bn  = row/bs;   /* Block number */
194   bp  = row % bs; /* Block position */
195   M   = ai[bn+1] - ai[bn];
196   *ncols = bs*M;
197 
198   if (v) {
199     *v = 0;
200     if (*ncols) {
201       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
202       for (i=0; i<M; i++) { /* for each block in the block row */
203         v_i  = *v + i*bs;
204         aa_i = aa + bs2*(ai[bn] + i);
205         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
206       }
207     }
208   }
209 
210   if (cols) {
211     *cols = 0;
212     if (*ncols) {
213       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
214       for (i=0; i<M; i++) { /* for each block in the block row */
215         cols_i = *cols + i*bs;
216         itmp  = bs*aj[ai[bn] + i];
217         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
218       }
219     }
220   }
221 
222   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
223   /* this segment is currently removed, so only entries in the upper triangle are obtained */
224 #ifdef column_search
225   v_i    = *v    + M*bs;
226   cols_i = *cols + M*bs;
227   for (i=0; i<bn; i++){ /* for each block row */
228     M = ai[i+1] - ai[i];
229     for (j=0; j<M; j++){
230       itmp = aj[ai[i] + j];    /* block column value */
231       if (itmp == bn){
232         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
233         for (k=0; k<bs; k++) {
234           *cols_i++ = i*bs+k;
235           *v_i++    = aa_i[k];
236         }
237         *ncols += bs;
238         break;
239       }
240     }
241   }
242 #endif
243 
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
249 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
250 {
251   int ierr;
252 
253   PetscFunctionBegin;
254   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
255   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
261 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
262 {
263   int ierr;
264   PetscFunctionBegin;
265   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 #undef __FUNCT__
270 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
271 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
272 {
273   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
274   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
275   PetscScalar  *aa;
276   FILE         *file;
277 
278   PetscFunctionBegin;
279   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
280   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
281   col_lens[0] = MAT_FILE_COOKIE;
282 
283   col_lens[1] = A->m;
284   col_lens[2] = A->m;
285   col_lens[3] = a->s_nz*bs2;
286 
287   /* store lengths of each row and write (including header) to file */
288   count = 0;
289   for (i=0; i<a->mbs; i++) {
290     for (j=0; j<bs; j++) {
291       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
292     }
293   }
294   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
295   ierr = PetscFree(col_lens);CHKERRQ(ierr);
296 
297   /* store column indices (zero start index) */
298   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
299   count = 0;
300   for (i=0; i<a->mbs; i++) {
301     for (j=0; j<bs; j++) {
302       for (k=a->i[i]; k<a->i[i+1]; k++) {
303         for (l=0; l<bs; l++) {
304           jj[count++] = bs*a->j[k] + l;
305         }
306       }
307     }
308   }
309   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
310   ierr = PetscFree(jj);CHKERRQ(ierr);
311 
312   /* store nonzero values */
313   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
314   count = 0;
315   for (i=0; i<a->mbs; i++) {
316     for (j=0; j<bs; j++) {
317       for (k=a->i[i]; k<a->i[i+1]; k++) {
318         for (l=0; l<bs; l++) {
319           aa[count++] = a->a[bs2*k + l*bs + j];
320         }
321       }
322     }
323   }
324   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
325   ierr = PetscFree(aa);CHKERRQ(ierr);
326 
327   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
328   if (file) {
329     fprintf(file,"-matload_block_size %d\n",a->bs);
330   }
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNCT__
335 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
336 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
337 {
338   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
339   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
340   char              *name;
341   PetscViewerFormat format;
342 
343   PetscFunctionBegin;
344   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
345   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
346   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
347     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
348   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
349     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
350   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
351     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
352     for (i=0; i<a->mbs; i++) {
353       for (j=0; j<bs; j++) {
354         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
355         for (k=a->i[i]; k<a->i[i+1]; k++) {
356           for (l=0; l<bs; l++) {
357 #if defined(PETSC_USE_COMPLEX)
358             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
359               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
360                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
361             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
362               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
363                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
364             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
365               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
366             }
367 #else
368             if (a->a[bs2*k + l*bs + j] != 0.0) {
369               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
370             }
371 #endif
372           }
373         }
374         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
375       }
376     }
377     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
378   } else {
379     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
380     for (i=0; i<a->mbs; i++) {
381       for (j=0; j<bs; j++) {
382         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
383         for (k=a->i[i]; k<a->i[i+1]; k++) {
384           for (l=0; l<bs; l++) {
385 #if defined(PETSC_USE_COMPLEX)
386             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
387               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
388                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
389             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
390               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
391                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
392             } else {
393               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
394             }
395 #else
396             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
397 #endif
398           }
399         }
400         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
401       }
402     }
403     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
404   }
405   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
411 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
412 {
413   Mat          A = (Mat) Aa;
414   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
415   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
416   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
417   MatScalar    *aa;
418   MPI_Comm     comm;
419   PetscViewer  viewer;
420 
421   PetscFunctionBegin;
422   /*
423       This is nasty. If this is called from an originally parallel matrix
424    then all processes call this,but only the first has the matrix so the
425    rest should return immediately.
426   */
427   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
428   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
429   if (rank) PetscFunctionReturn(0);
430 
431   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
432 
433   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
434   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
435 
436   /* loop over matrix elements drawing boxes */
437   color = PETSC_DRAW_BLUE;
438   for (i=0,row=0; i<mbs; i++,row+=bs) {
439     for (j=a->i[i]; j<a->i[i+1]; j++) {
440       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
441       x_l = a->j[j]*bs; x_r = x_l + 1.0;
442       aa = a->a + j*bs2;
443       for (k=0; k<bs; k++) {
444         for (l=0; l<bs; l++) {
445           if (PetscRealPart(*aa++) >=  0.) continue;
446           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
447         }
448       }
449     }
450   }
451   color = PETSC_DRAW_CYAN;
452   for (i=0,row=0; i<mbs; i++,row+=bs) {
453     for (j=a->i[i]; j<a->i[i+1]; j++) {
454       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
455       x_l = a->j[j]*bs; x_r = x_l + 1.0;
456       aa = a->a + j*bs2;
457       for (k=0; k<bs; k++) {
458         for (l=0; l<bs; l++) {
459           if (PetscRealPart(*aa++) != 0.) continue;
460           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
461         }
462       }
463     }
464   }
465 
466   color = PETSC_DRAW_RED;
467   for (i=0,row=0; i<mbs; i++,row+=bs) {
468     for (j=a->i[i]; j<a->i[i+1]; j++) {
469       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
470       x_l = a->j[j]*bs; x_r = x_l + 1.0;
471       aa = a->a + j*bs2;
472       for (k=0; k<bs; k++) {
473         for (l=0; l<bs; l++) {
474           if (PetscRealPart(*aa++) <= 0.) continue;
475           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
476         }
477       }
478     }
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
485 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
486 {
487   int          ierr;
488   PetscReal    xl,yl,xr,yr,w,h;
489   PetscDraw    draw;
490   PetscTruth   isnull;
491 
492   PetscFunctionBegin;
493 
494   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
495   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
496 
497   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
498   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
499   xr += w;    yr += h;  xl = -w;     yl = -h;
500   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
501   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
502   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatView_SeqSBAIJ"
508 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
509 {
510   int        ierr;
511   PetscTruth issocket,isascii,isbinary,isdraw;
512 
513   PetscFunctionBegin;
514   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
515   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
516   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
517   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
518   if (issocket) {
519     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
520   } else if (isascii){
521     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
522   } else if (isbinary) {
523     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
524   } else if (isdraw) {
525     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
526   } else {
527     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
528   }
529   PetscFunctionReturn(0);
530 }
531 
532 
533 #undef __FUNCT__
534 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
535 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
536 {
537   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
538   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
539   int        *ai = a->i,*ailen = a->ilen;
540   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
541   MatScalar  *ap,*aa = a->a,zero = 0.0;
542 
543   PetscFunctionBegin;
544   for (k=0; k<m; k++) { /* loop over rows */
545     row  = im[k]; brow = row/bs;
546     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
547     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
548     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
549     nrow = ailen[brow];
550     for (l=0; l<n; l++) { /* loop over columns */
551       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
552       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
553       col  = in[l] ;
554       bcol = col/bs;
555       cidx = col%bs;
556       ridx = row%bs;
557       high = nrow;
558       low  = 0; /* assume unsorted */
559       while (high-low > 5) {
560         t = (low+high)/2;
561         if (rp[t] > bcol) high = t;
562         else             low  = t;
563       }
564       for (i=low; i<high; i++) {
565         if (rp[i] > bcol) break;
566         if (rp[i] == bcol) {
567           *v++ = ap[bs2*i+bs*cidx+ridx];
568           goto finished;
569         }
570       }
571       *v++ = zero;
572       finished:;
573     }
574   }
575   PetscFunctionReturn(0);
576 }
577 
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
581 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
582 {
583   PetscFunctionBegin;
584   SETERRQ(1,"Function not yet written for SBAIJ format");
585 }
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
589 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
590 {
591   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
592   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
593   int        m = A->m,*ip,N,*ailen = a->ilen;
594   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
595   MatScalar  *aa = a->a,*ap;
596 #if defined(PETSC_HAVE_SPOOLES)
597   PetscTruth   flag;
598 #endif
599 
600   PetscFunctionBegin;
601   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
602 
603   if (m) rmax = ailen[0];
604   for (i=1; i<mbs; i++) {
605     /* move each row back by the amount of empty slots (fshift) before it*/
606     fshift += imax[i-1] - ailen[i-1];
607     rmax   = PetscMax(rmax,ailen[i]);
608     if (fshift) {
609       ip = aj + ai[i]; ap = aa + bs2*ai[i];
610       N = ailen[i];
611       for (j=0; j<N; j++) {
612         ip[j-fshift] = ip[j];
613         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
614       }
615     }
616     ai[i] = ai[i-1] + ailen[i-1];
617   }
618   if (mbs) {
619     fshift += imax[mbs-1] - ailen[mbs-1];
620     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
621   }
622   /* reset ilen and imax for each row */
623   for (i=0; i<mbs; i++) {
624     ailen[i] = imax[i] = ai[i+1] - ai[i];
625   }
626   a->s_nz = ai[mbs];
627 
628   /* diagonals may have moved, reset it */
629   if (a->diag) {
630     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
631   }
632   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
633            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
634   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
635            a->reallocs);
636   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
637   a->reallocs          = 0;
638   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
639 
640 #if defined(PETSC_HAVE_SPOOLES)
641   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
642   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); }
643 #endif
644 
645   PetscFunctionReturn(0);
646 }
647 
648 /*
649    This function returns an array of flags which indicate the locations of contiguous
650    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
651    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
652    Assume: sizes should be long enough to hold all the values.
653 */
654 #undef __FUNCT__
655 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
656 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
657 {
658   int        i,j,k,row;
659   PetscTruth flg;
660 
661   PetscFunctionBegin;
662   for (i=0,j=0; i<n; j++) {
663     row = idx[i];
664     if (row%bs!=0) { /* Not the begining of a block */
665       sizes[j] = 1;
666       i++;
667     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
668       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
669       i++;
670     } else { /* Begining of the block, so check if the complete block exists */
671       flg = PETSC_TRUE;
672       for (k=1; k<bs; k++) {
673         if (row+k != idx[i+k]) { /* break in the block */
674           flg = PETSC_FALSE;
675           break;
676         }
677       }
678       if (flg == PETSC_TRUE) { /* No break in the bs */
679         sizes[j] = bs;
680         i+= bs;
681       } else {
682         sizes[j] = 1;
683         i++;
684       }
685     }
686   }
687   *bs_max = j;
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
693 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag)
694 {
695   Mat_SeqSBAIJ  *sbaij=(Mat_SeqSBAIJ*)A->data;
696   int           ierr,i,j,k,count,is_n,*is_idx,*rows;
697   int           bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max;
698   PetscScalar   zero = 0.0;
699   MatScalar     *aa;
700 
701   PetscFunctionBegin;
702   /* Make a copy of the IS and  sort it */
703   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
704   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
705 
706   /* allocate memory for rows,sizes */
707   ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
708   sizes = rows + is_n;
709 
710   /* initialize copy IS values to rows, and sort them */
711   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
712   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
713   if (sbaij->keepzeroedrows) { /* do not change nonzero structure */
714     for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */
715     bs_max = is_n;            /* bs_max: num. of contiguous block row in the row */
716   } else {
717     ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
718   }
719   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
720 
721   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
722     row   = rows[j];                  /* row to be zeroed */
723     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
724     count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */
725     aa    = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs);
726     if (sizes[i] == bs && !sbaij->keepzeroedrows) {
727       if (diag) {
728         if (sbaij->ilen[row/bs] > 0) {
729           sbaij->ilen[row/bs] = 1;
730           sbaij->j[sbaij->i[row/bs]] = row/bs;
731           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
732         }
733         /* Now insert all the diagoanl values for this bs */
734         for (k=0; k<bs; k++) {
735           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
736         }
737       } else { /* (!diag) */
738         sbaij->ilen[row/bs] = 0;
739       } /* end (!diag) */
740     } else { /* (sizes[i] != bs), broken block */
741 #if defined (PETSC_USE_DEBUG)
742       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
743 #endif
744       for (k=0; k<count; k++) {
745         aa[0] = zero;
746         aa+=bs;
747       }
748       if (diag) {
749         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
750       }
751     }
752   }
753 
754   ierr = PetscFree(rows);CHKERRQ(ierr);
755   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
756   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
757   PetscFunctionReturn(0);
758 }
759 
760 /* Only add/insert a(i,j) with i<=j (blocks).
761    Any a(i,j) with i>j input by user is ingored.
762 */
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
766 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
767 {
768   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
769   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
770   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
771   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
772   int         ridx,cidx,bs2=a->bs2,ierr;
773   MatScalar   *ap,value,*aa=a->a,*bap;
774 
775   PetscFunctionBegin;
776 
777   for (k=0; k<m; k++) { /* loop over added rows */
778     row  = im[k];       /* row number */
779     brow = row/bs;      /* block row number */
780     if (row < 0) continue;
781 #if defined(PETSC_USE_BOPT_g)
782     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
783 #endif
784     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
785     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
786     rmax = imax[brow];  /* maximum space allocated for this row */
787     nrow = ailen[brow]; /* actual length of this row */
788     low  = 0;
789 
790     for (l=0; l<n; l++) { /* loop over added columns */
791       if (in[l] < 0) continue;
792 #if defined(PETSC_USE_BOPT_g)
793       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
794 #endif
795       col = in[l];
796       bcol = col/bs;              /* block col number */
797 
798       if (brow <= bcol){
799         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
800         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
801           /* element value a(k,l) */
802           if (roworiented) {
803             value = v[l + k*n];
804           } else {
805             value = v[k + l*m];
806           }
807 
808           /* move pointer bap to a(k,l) quickly and add/insert value */
809           if (!sorted) low = 0; high = nrow;
810           while (high-low > 7) {
811             t = (low+high)/2;
812             if (rp[t] > bcol) high = t;
813             else              low  = t;
814           }
815           for (i=low; i<high; i++) {
816             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
817             if (rp[i] > bcol) break;
818             if (rp[i] == bcol) {
819               bap  = ap +  bs2*i + bs*cidx + ridx;
820               if (is == ADD_VALUES) *bap += value;
821               else                  *bap  = value;
822               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
823               if (brow == bcol && ridx < cidx){
824                 bap  = ap +  bs2*i + bs*ridx + cidx;
825                 if (is == ADD_VALUES) *bap += value;
826                 else                  *bap  = value;
827               }
828               goto noinsert1;
829             }
830           }
831 
832       if (nonew == 1) goto noinsert1;
833       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
834       if (nrow >= rmax) {
835         /* there is no extra room in row, therefore enlarge */
836         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
837         MatScalar *new_a;
838 
839         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
840 
841         /* Malloc new storage space */
842         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
843         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
844         new_j = (int*)(new_a + bs2*new_nz);
845         new_i = new_j + new_nz;
846 
847         /* copy over old data into new slots */
848         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
849         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
850         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
851         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
852         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
853         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
854         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
855         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
856         /* free up old matrix storage */
857         ierr = PetscFree(a->a);CHKERRQ(ierr);
858         if (!a->singlemalloc) {
859           ierr = PetscFree(a->i);CHKERRQ(ierr);
860           ierr = PetscFree(a->j);CHKERRQ(ierr);
861         }
862         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
863         a->singlemalloc = PETSC_TRUE;
864 
865         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
866         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
867         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
868         a->s_maxnz += bs2*CHUNKSIZE;
869         a->reallocs++;
870         a->s_nz++;
871       }
872 
873       N = nrow++ - 1;
874       /* shift up all the later entries in this row */
875       for (ii=N; ii>=i; ii--) {
876         rp[ii+1] = rp[ii];
877         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
878       }
879       if (N>=i) {
880         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
881       }
882       rp[i]                      = bcol;
883       ap[bs2*i + bs*cidx + ridx] = value;
884       noinsert1:;
885       low = i;
886       /* } */
887         }
888       } /* end of if .. if..  */
889     }                     /* end of loop over added columns */
890     ailen[brow] = nrow;
891   }                       /* end of loop over added rows */
892 
893   PetscFunctionReturn(0);
894 }
895 
896 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
897 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
898 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
899 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
900 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
901 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
902 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
903 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat);
904 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
905 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
906 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
907 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
908 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
909 extern int MatZeroEntries_SeqSBAIJ(Mat);
910 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
911 
912 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
913 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
914 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
915 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
916 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
917 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
918 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
919 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
920 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
921 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
922 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
923 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
924 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
925 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
926 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
927 
928 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
929 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
930 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
931 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
932 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
933 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
934 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
935 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
936 
937 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
938 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
939 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
940 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
941 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
942 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
943 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
944 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
945 
946 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
947 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
948 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
949 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
950 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
951 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
952 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
953 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
954 
955 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
956 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
957 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
958 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
959 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
960 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
961 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
962 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
963 
964 #ifdef HAVE_MatICCFactor
965 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
966 #undef __FUNCT__
967 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
968 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
969 {
970   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
971   Mat         outA;
972   int         ierr;
973   PetscTruth  row_identity,col_identity;
974 
975   PetscFunctionBegin;
976   /*
977   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
978   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
979   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
980   if (!row_identity || !col_identity) {
981     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
982   }
983   */
984 
985   outA          = inA;
986   inA->factor   = FACTOR_CHOLESKY;
987 
988   if (!a->diag) {
989     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
990   }
991   /*
992       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
993       for ILU(0) factorization with natural ordering
994   */
995   switch (a->bs) {
996   case 1:
997     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
998     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
999     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1000   case 2:
1001     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1002     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1003     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1004     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1005     break;
1006   case 3:
1007     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1008     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1009     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1010     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1011     break;
1012   case 4:
1013     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1014     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1015     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1016     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1017     break;
1018   case 5:
1019     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1020     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1021     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1022     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1023     break;
1024   case 6:
1025     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1026     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1027     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1028     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1029     break;
1030   case 7:
1031     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1032     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1033     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1034     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1035     break;
1036   default:
1037     a->row        = row;
1038     a->icol       = col;
1039     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1040     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1041 
1042     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1043     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1044     PetscLogObjectParent(inA,a->icol);
1045 
1046     if (!a->solve_work) {
1047       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1048       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1049     }
1050   }
1051 
1052   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1053 
1054   PetscFunctionReturn(0);
1055 }
1056 #endif
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1060 int MatPrintHelp_SeqSBAIJ(Mat A)
1061 {
1062   static PetscTruth called = PETSC_FALSE;
1063   MPI_Comm          comm = A->comm;
1064   int               ierr;
1065 
1066   PetscFunctionBegin;
1067   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1068   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1069   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 EXTERN_C_BEGIN
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1076 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1077 {
1078   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1079   int         i,nz,n;
1080 
1081   PetscFunctionBegin;
1082   nz = baij->s_maxnz;
1083   n  = mat->n;
1084   for (i=0; i<nz; i++) {
1085     baij->j[i] = indices[i];
1086   }
1087   baij->s_nz = nz;
1088   for (i=0; i<n; i++) {
1089     baij->ilen[i] = baij->imax[i];
1090   }
1091 
1092   PetscFunctionReturn(0);
1093 }
1094 EXTERN_C_END
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1098 /*@
1099     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1100        in the matrix.
1101 
1102   Input Parameters:
1103 +  mat     - the SeqSBAIJ matrix
1104 -  indices - the column indices
1105 
1106   Level: advanced
1107 
1108   Notes:
1109     This can be called if you have precomputed the nonzero structure of the
1110   matrix and want to provide it to the matrix object to improve the performance
1111   of the MatSetValues() operation.
1112 
1113     You MUST have set the correct numbers of nonzeros per row in the call to
1114   MatCreateSeqSBAIJ().
1115 
1116     MUST be called before any calls to MatSetValues()
1117 
1118 .seealso: MatCreateSeqSBAIJ
1119 @*/
1120 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1121 {
1122   int ierr,(*f)(Mat,int *);
1123 
1124   PetscFunctionBegin;
1125   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1126   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1127   if (f) {
1128     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1129   } else {
1130     SETERRQ(1,"Wrong type of matrix to set column indices");
1131   }
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1137 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1138 {
1139   int        ierr;
1140 
1141   PetscFunctionBegin;
1142   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #include "petscblaslapack.h"
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1149 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1150 {
1151   int          ierr,one=1;
1152   Mat_SeqSBAIJ *x  = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data;
1153 
1154   PetscFunctionBegin;
1155   if (str == SAME_NONZERO_PATTERN) {
1156     BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one);
1157   } else {
1158     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1159   }
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 /* -------------------------------------------------------------------*/
1164 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1165        MatGetRow_SeqSBAIJ,
1166        MatRestoreRow_SeqSBAIJ,
1167        MatMult_SeqSBAIJ_N,
1168        MatMultAdd_SeqSBAIJ_N,
1169        MatMultTranspose_SeqSBAIJ,
1170        MatMultTransposeAdd_SeqSBAIJ,
1171        MatSolve_SeqSBAIJ_N,
1172        0,
1173        0,
1174        0,
1175        0,
1176        MatCholeskyFactor_SeqSBAIJ,
1177        MatRelax_SeqSBAIJ,
1178        MatTranspose_SeqSBAIJ,
1179        MatGetInfo_SeqSBAIJ,
1180        MatEqual_SeqSBAIJ,
1181        MatGetDiagonal_SeqSBAIJ,
1182        MatDiagonalScale_SeqSBAIJ,
1183        MatNorm_SeqSBAIJ,
1184        0,
1185        MatAssemblyEnd_SeqSBAIJ,
1186        0,
1187        MatSetOption_SeqSBAIJ,
1188        MatZeroEntries_SeqSBAIJ,
1189        MatZeroRows_SeqSBAIJ,
1190        0,
1191        0,
1192        MatCholeskyFactorSymbolic_SeqSBAIJ,
1193        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1194        MatSetUpPreallocation_SeqSBAIJ,
1195        0,
1196        MatICCFactorSymbolic_SeqSBAIJ,
1197        0,
1198        0,
1199        MatDuplicate_SeqSBAIJ,
1200        0,
1201        0,
1202        0,
1203        0,
1204        MatAXPY_SeqSBAIJ,
1205        MatGetSubMatrices_SeqSBAIJ,
1206        MatIncreaseOverlap_SeqSBAIJ,
1207        MatGetValues_SeqSBAIJ,
1208        0,
1209        MatPrintHelp_SeqSBAIJ,
1210        MatScale_SeqSBAIJ,
1211        0,
1212        0,
1213        0,
1214        MatGetBlockSize_SeqSBAIJ,
1215        MatGetRowIJ_SeqSBAIJ,
1216        MatRestoreRowIJ_SeqSBAIJ,
1217        0,
1218        0,
1219        0,
1220        0,
1221        0,
1222        0,
1223        MatSetValuesBlocked_SeqSBAIJ,
1224        MatGetSubMatrix_SeqSBAIJ,
1225        0,
1226        0,
1227        MatGetPetscMaps_Petsc,
1228        0,
1229        0,
1230        0,
1231        0,
1232        0,
1233        0,
1234        MatGetRowMax_SeqSBAIJ};
1235 
1236 EXTERN_C_BEGIN
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1239 int MatStoreValues_SeqSBAIJ(Mat mat)
1240 {
1241   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1242   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1243   int         ierr;
1244 
1245   PetscFunctionBegin;
1246   if (aij->nonew != 1) {
1247     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1248   }
1249 
1250   /* allocate space for values if not already there */
1251   if (!aij->saved_values) {
1252     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1253   }
1254 
1255   /* copy values over */
1256   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 EXTERN_C_END
1260 
1261 EXTERN_C_BEGIN
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1264 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1265 {
1266   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1267   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1268 
1269   PetscFunctionBegin;
1270   if (aij->nonew != 1) {
1271     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1272   }
1273   if (!aij->saved_values) {
1274     SETERRQ(1,"Must call MatStoreValues(A);first");
1275   }
1276 
1277   /* copy values over */
1278   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 EXTERN_C_END
1282 
1283 EXTERN_C_BEGIN
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1286 int MatCreate_SeqSBAIJ(Mat B)
1287 {
1288   Mat_SeqSBAIJ *b;
1289   int          ierr,size;
1290 
1291   PetscFunctionBegin;
1292   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1293   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1294   B->m = B->M = PetscMax(B->m,B->M);
1295   B->n = B->N = PetscMax(B->n,B->N);
1296 
1297   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1298   B->data = (void*)b;
1299   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1300   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1301   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1302   B->ops->view        = MatView_SeqSBAIJ;
1303   B->factor           = 0;
1304   B->lupivotthreshold = 1.0;
1305   B->mapping          = 0;
1306   b->row              = 0;
1307   b->icol             = 0;
1308   b->reallocs         = 0;
1309   b->saved_values     = 0;
1310 
1311   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1312   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1313 
1314   b->sorted           = PETSC_FALSE;
1315   b->roworiented      = PETSC_TRUE;
1316   b->nonew            = 0;
1317   b->diag             = 0;
1318   b->solve_work       = 0;
1319   b->mult_work        = 0;
1320   B->spptr            = 0;
1321   b->keepzeroedrows   = PETSC_FALSE;
1322 
1323   b->inew             = 0;
1324   b->jnew             = 0;
1325   b->anew             = 0;
1326   b->a2anew           = 0;
1327   b->permute          = PETSC_FALSE;
1328 
1329   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1330                                      "MatStoreValues_SeqSBAIJ",
1331                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1332   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1333                                      "MatRetrieveValues_SeqSBAIJ",
1334                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1335   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1336                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1337                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 EXTERN_C_END
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1344 /*@C
1345    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1346    compressed row) format.  For good matrix assembly performance the
1347    user should preallocate the matrix storage by setting the parameter nz
1348    (or the array nnz).  By setting these parameters accurately, performance
1349    during matrix assembly can be increased by more than a factor of 50.
1350 
1351    Collective on Mat
1352 
1353    Input Parameters:
1354 +  A - the symmetric matrix
1355 .  bs - size of block
1356 .  nz - number of block nonzeros per block row (same for all rows)
1357 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1358          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1359 
1360    Options Database Keys:
1361 .   -mat_no_unroll - uses code that does not unroll the loops in the
1362                      block calculations (much slower)
1363 .    -mat_block_size - size of the blocks to use
1364 
1365    Level: intermediate
1366 
1367    Notes:
1368    Specify the preallocated storage with either nz or nnz (not both).
1369    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1370    allocation.  For additional details, see the users manual chapter on
1371    matrices.
1372 
1373 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1374 @*/
1375 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1376 {
1377   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1378   int          i,len,ierr,mbs,bs2;
1379   PetscTruth   flg;
1380   int          s_nz;
1381 
1382   PetscFunctionBegin;
1383   B->preallocated = PETSC_TRUE;
1384   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1385   mbs  = B->m/bs;
1386   bs2  = bs*bs;
1387 
1388   if (mbs*bs != B->m) {
1389     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1390   }
1391 
1392   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1393   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1394   if (nnz) {
1395     for (i=0; i<mbs; i++) {
1396       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1397       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1398     }
1399   }
1400 
1401   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1402   if (!flg) {
1403     switch (bs) {
1404     case 1:
1405       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1406       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1407       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1408       B->ops->mult            = MatMult_SeqSBAIJ_1;
1409       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1410       break;
1411     case 2:
1412       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1413       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1414       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1415       B->ops->mult            = MatMult_SeqSBAIJ_2;
1416       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1417       break;
1418     case 3:
1419       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1420       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1421       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1422       B->ops->mult            = MatMult_SeqSBAIJ_3;
1423       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1424       break;
1425     case 4:
1426       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1427       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1428       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1429       B->ops->mult            = MatMult_SeqSBAIJ_4;
1430       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1431       break;
1432     case 5:
1433       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1434       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1435       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1436       B->ops->mult            = MatMult_SeqSBAIJ_5;
1437       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1438       break;
1439     case 6:
1440       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1441       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1442       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1443       B->ops->mult            = MatMult_SeqSBAIJ_6;
1444       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1445       break;
1446     case 7:
1447       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1448       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1449       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1450       B->ops->mult            = MatMult_SeqSBAIJ_7;
1451       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1452       break;
1453     }
1454   }
1455 
1456   b->mbs = mbs;
1457   b->nbs = mbs;
1458   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1459   if (!nnz) {
1460     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1461     else if (nz <= 0)        nz = 1;
1462     for (i=0; i<mbs; i++) {
1463       b->imax[i] = nz;
1464     }
1465     nz = nz*mbs; /* total nz */
1466   } else {
1467     nz = 0;
1468     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1469   }
1470   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1471   s_nz = nz;
1472 
1473   /* allocate the matrix space */
1474   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1475   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1476   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1477   b->j = (int*)(b->a + s_nz*bs2);
1478   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1479   b->i = b->j + s_nz;
1480   b->singlemalloc = PETSC_TRUE;
1481 
1482   /* pointer to beginning of each row */
1483   b->i[0] = 0;
1484   for (i=1; i<mbs+1; i++) {
1485     b->i[i] = b->i[i-1] + b->imax[i-1];
1486   }
1487 
1488   /* b->ilen will count nonzeros in each block row so far. */
1489   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1490   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1491   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1492 
1493   b->bs               = bs;
1494   b->bs2              = bs2;
1495   b->s_nz             = 0;
1496   b->s_maxnz          = s_nz*bs2;
1497 
1498   b->inew             = 0;
1499   b->jnew             = 0;
1500   b->anew             = 0;
1501   b->a2anew           = 0;
1502   b->permute          = PETSC_FALSE;
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatCreateSeqSBAIJ"
1509 /*@C
1510    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1511    compressed row) format.  For good matrix assembly performance the
1512    user should preallocate the matrix storage by setting the parameter nz
1513    (or the array nnz).  By setting these parameters accurately, performance
1514    during matrix assembly can be increased by more than a factor of 50.
1515 
1516    Collective on MPI_Comm
1517 
1518    Input Parameters:
1519 +  comm - MPI communicator, set to PETSC_COMM_SELF
1520 .  bs - size of block
1521 .  m - number of rows, or number of columns
1522 .  nz - number of block nonzeros per block row (same for all rows)
1523 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1524          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1525 
1526    Output Parameter:
1527 .  A - the symmetric matrix
1528 
1529    Options Database Keys:
1530 .   -mat_no_unroll - uses code that does not unroll the loops in the
1531                      block calculations (much slower)
1532 .    -mat_block_size - size of the blocks to use
1533 
1534    Level: intermediate
1535 
1536    Notes:
1537 
1538    Specify the preallocated storage with either nz or nnz (not both).
1539    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1540    allocation.  For additional details, see the users manual chapter on
1541    matrices.
1542 
1543 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1544 @*/
1545 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1546 {
1547   int ierr;
1548 
1549   PetscFunctionBegin;
1550   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1551   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1552   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1558 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1559 {
1560   Mat         C;
1561   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1562   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1563 
1564   PetscFunctionBegin;
1565   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1566 
1567   *B = 0;
1568   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1569   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1570   c    = (Mat_SeqSBAIJ*)C->data;
1571 
1572   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1573   C->preallocated   = PETSC_TRUE;
1574   C->factor         = A->factor;
1575   c->row            = 0;
1576   c->icol           = 0;
1577   c->saved_values   = 0;
1578   c->keepzeroedrows = a->keepzeroedrows;
1579   C->assembled      = PETSC_TRUE;
1580 
1581   c->bs         = a->bs;
1582   c->bs2        = a->bs2;
1583   c->mbs        = a->mbs;
1584   c->nbs        = a->nbs;
1585 
1586   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1587   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1588   for (i=0; i<mbs; i++) {
1589     c->imax[i] = a->imax[i];
1590     c->ilen[i] = a->ilen[i];
1591   }
1592 
1593   /* allocate the matrix space */
1594   c->singlemalloc = PETSC_TRUE;
1595   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1596   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1597   c->j = (int*)(c->a + nz*bs2);
1598   c->i = c->j + nz;
1599   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1600   if (mbs > 0) {
1601     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1602     if (cpvalues == MAT_COPY_VALUES) {
1603       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1604     } else {
1605       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1606     }
1607   }
1608 
1609   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1610   c->sorted      = a->sorted;
1611   c->roworiented = a->roworiented;
1612   c->nonew       = a->nonew;
1613 
1614   if (a->diag) {
1615     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1616     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1617     for (i=0; i<mbs; i++) {
1618       c->diag[i] = a->diag[i];
1619     }
1620   } else c->diag        = 0;
1621   c->s_nz               = a->s_nz;
1622   c->s_maxnz            = a->s_maxnz;
1623   c->solve_work         = 0;
1624   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1625   c->mult_work          = 0;
1626   *B = C;
1627   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 EXTERN_C_BEGIN
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1634 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1635 {
1636   Mat_SeqSBAIJ *a;
1637   Mat          B;
1638   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1639   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1640   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1641   int          *masked,nmask,tmp,bs2,ishift;
1642   PetscScalar  *aa;
1643   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1644 
1645   PetscFunctionBegin;
1646   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1647   bs2  = bs*bs;
1648 
1649   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1650   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1651   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1652   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1653   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1654   M = header[1]; N = header[2]; nz = header[3];
1655 
1656   if (header[3] < 0) {
1657     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1658   }
1659 
1660   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1661 
1662   /*
1663      This code adds extra rows to make sure the number of rows is
1664     divisible by the blocksize
1665   */
1666   mbs        = M/bs;
1667   extra_rows = bs - M + bs*(mbs);
1668   if (extra_rows == bs) extra_rows = 0;
1669   else                  mbs++;
1670   if (extra_rows) {
1671     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1672   }
1673 
1674   /* read in row lengths */
1675   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1676   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1677   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1678 
1679   /* read in column indices */
1680   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1681   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1682   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1683 
1684   /* loop over row lengths determining block row lengths */
1685   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1686   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1687   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1688   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1689   masked   = mask + mbs;
1690   rowcount = 0; nzcount = 0;
1691   for (i=0; i<mbs; i++) {
1692     nmask = 0;
1693     for (j=0; j<bs; j++) {
1694       kmax = rowlengths[rowcount];
1695       for (k=0; k<kmax; k++) {
1696         tmp = jj[nzcount++]/bs;   /* block col. index */
1697         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1698       }
1699       rowcount++;
1700     }
1701     s_browlengths[i] += nmask;
1702 
1703     /* zero out the mask elements we set */
1704     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1705   }
1706 
1707   /* create our matrix */
1708   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr);
1709   B = *A;
1710   a = (Mat_SeqSBAIJ*)B->data;
1711 
1712   /* set matrix "i" values */
1713   a->i[0] = 0;
1714   for (i=1; i<= mbs; i++) {
1715     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1716     a->ilen[i-1] = s_browlengths[i-1];
1717   }
1718   a->s_nz = a->i[mbs];
1719 
1720   /* read in nonzero values */
1721   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1722   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1723   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1724 
1725   /* set "a" and "j" values into matrix */
1726   nzcount = 0; jcount = 0;
1727   for (i=0; i<mbs; i++) {
1728     nzcountb = nzcount;
1729     nmask    = 0;
1730     for (j=0; j<bs; j++) {
1731       kmax = rowlengths[i*bs+j];
1732       for (k=0; k<kmax; k++) {
1733         tmp = jj[nzcount++]/bs; /* block col. index */
1734         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1735       }
1736     }
1737     /* sort the masked values */
1738     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1739 
1740     /* set "j" values into matrix */
1741     maskcount = 1;
1742     for (j=0; j<nmask; j++) {
1743       a->j[jcount++]  = masked[j];
1744       mask[masked[j]] = maskcount++;
1745     }
1746 
1747     /* set "a" values into matrix */
1748     ishift = bs2*a->i[i];
1749     for (j=0; j<bs; j++) {
1750       kmax = rowlengths[i*bs+j];
1751       for (k=0; k<kmax; k++) {
1752         tmp       = jj[nzcountb]/bs ; /* block col. index */
1753         if (tmp >= i){
1754           block     = mask[tmp] - 1;
1755           point     = jj[nzcountb] - bs*tmp;
1756           idx       = ishift + bs2*block + j + bs*point;
1757           a->a[idx] = aa[nzcountb];
1758         }
1759         nzcountb++;
1760       }
1761     }
1762     /* zero out the mask elements we set */
1763     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1764   }
1765   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1766 
1767   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1768   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1769   ierr = PetscFree(aa);CHKERRQ(ierr);
1770   ierr = PetscFree(jj);CHKERRQ(ierr);
1771   ierr = PetscFree(mask);CHKERRQ(ierr);
1772 
1773   B->assembled = PETSC_TRUE;
1774   ierr = MatView_Private(B);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 EXTERN_C_END
1778 
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1781 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1782 {
1783   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1784   MatScalar    *aa=a->a,*v,*v1;
1785   PetscScalar  *x,*b,*t,sum,d;
1786   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1787   int          nz,nz1,*vj,*vj1,i;
1788 
1789   PetscFunctionBegin;
1790   its = its*lits;
1791   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1792 
1793   if (bs > 1)
1794     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1795 
1796   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1797   if (xx != bb) {
1798     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1799   } else {
1800     b = x;
1801   }
1802 
1803   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1804 
1805   if (flag & SOR_ZERO_INITIAL_GUESS) {
1806     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1807       for (i=0; i<m; i++)
1808         t[i] = b[i];
1809 
1810       for (i=0; i<m; i++){
1811         d  = *(aa + ai[i]);  /* diag[i] */
1812         v  = aa + ai[i] + 1;
1813         vj = aj + ai[i] + 1;
1814         nz = ai[i+1] - ai[i] - 1;
1815         x[i] = omega*t[i]/d;
1816         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1817         PetscLogFlops(2*nz-1);
1818       }
1819     }
1820 
1821     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1822       for (i=0; i<m; i++)
1823         t[i] = b[i];
1824 
1825       for (i=0; i<m-1; i++){  /* update rhs */
1826         v  = aa + ai[i] + 1;
1827         vj = aj + ai[i] + 1;
1828         nz = ai[i+1] - ai[i] - 1;
1829         while (nz--) t[*vj++] -= x[i]*(*v++);
1830         PetscLogFlops(2*nz-1);
1831       }
1832       for (i=m-1; i>=0; i--){
1833         d  = *(aa + ai[i]);
1834         v  = aa + ai[i] + 1;
1835         vj = aj + ai[i] + 1;
1836         nz = ai[i+1] - ai[i] - 1;
1837         sum = t[i];
1838         while (nz--) sum -= x[*vj++]*(*v++);
1839         PetscLogFlops(2*nz-1);
1840         x[i] =   (1-omega)*x[i] + omega*sum/d;
1841       }
1842     }
1843     its--;
1844   }
1845 
1846   while (its--) {
1847     /*
1848        forward sweep:
1849        for i=0,...,m-1:
1850          sum[i] = (b[i] - U(i,:)x )/d[i];
1851          x[i]   = (1-omega)x[i] + omega*sum[i];
1852          b      = b - x[i]*U^T(i,:);
1853 
1854     */
1855     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1856       for (i=0; i<m; i++)
1857         t[i] = b[i];
1858 
1859       for (i=0; i<m; i++){
1860         d  = *(aa + ai[i]);  /* diag[i] */
1861         v  = aa + ai[i] + 1; v1=v;
1862         vj = aj + ai[i] + 1; vj1=vj;
1863         nz = ai[i+1] - ai[i] - 1; nz1=nz;
1864         sum = t[i];
1865         while (nz1--) sum -= (*v1++)*x[*vj1++];
1866         x[i] = (1-omega)*x[i] + omega*sum/d;
1867         while (nz--) t[*vj++] -= x[i]*(*v++);
1868         PetscLogFlops(4*nz-2);
1869       }
1870     }
1871 
1872   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1873       /*
1874        backward sweep:
1875        b = b - x[i]*U^T(i,:), i=0,...,n-2
1876        for i=m-1,...,0:
1877          sum[i] = (b[i] - U(i,:)x )/d[i];
1878          x[i]   = (1-omega)x[i] + omega*sum[i];
1879       */
1880       for (i=0; i<m; i++)
1881         t[i] = b[i];
1882 
1883       for (i=0; i<m-1; i++){  /* update rhs */
1884         v  = aa + ai[i] + 1;
1885         vj = aj + ai[i] + 1;
1886         nz = ai[i+1] - ai[i] - 1;
1887         while (nz--) t[*vj++] -= x[i]*(*v++);
1888         PetscLogFlops(2*nz-1);
1889       }
1890       for (i=m-1; i>=0; i--){
1891         d  = *(aa + ai[i]);
1892         v  = aa + ai[i] + 1;
1893         vj = aj + ai[i] + 1;
1894         nz = ai[i+1] - ai[i] - 1;
1895         sum = t[i];
1896         while (nz--) sum -= x[*vj++]*(*v++);
1897         PetscLogFlops(2*nz-1);
1898         x[i] =   (1-omega)*x[i] + omega*sum/d;
1899       }
1900     }
1901   }
1902 
1903   ierr = PetscFree(t); CHKERRQ(ierr);
1904   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1905   if (bb != xx) {
1906     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1907   }
1908 
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 
1913 
1914 
1915 
1916 
1917