xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 61860be5939b0caf981177eaa35e3e4ec5310d8f)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 /* extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); */
50 
51 #undef __FUNCT__
52 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
53 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
54 {
55   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
56 
57   PetscFunctionBegin;
58 
59   if (ia) {
60     SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
61   }
62 
63 #ifdef NEW
64   int         ierr;
65 
66   if (!ia) PetscFunctionReturn(0);
67 
68 /*
69   if (symmetric) {
70     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
71   } else if (oshift == 1) {
72     int nz = a->i[n];
73     for (i=0; i<nz; i++) a->j[i]++;
74     for (i=0; i<n+1; i++) a->i[i]++;
75     *ia = a->i; *ja = a->j;
76   } else {
77 */
78     *ia = a->i; *ja = a->j;
79     /* } */
80 #endif
81   *nn = a->mbs;
82   PetscFunctionReturn(0);
83 }
84 
85 #undef __FUNCT__
86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
87 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
88 {
89   PetscFunctionBegin;
90   if (!ia) PetscFunctionReturn(0);
91   SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
92   /* PetscFunctionReturn(0); */
93 }
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
97 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
98 {
99   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
100 
101   PetscFunctionBegin;
102   *bs = sbaij->bs;
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
108 int MatDestroy_SeqSBAIJ(Mat A)
109 {
110   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
111   int         ierr;
112 
113   PetscFunctionBegin;
114 #if defined(PETSC_USE_LOG)
115   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
116 #endif
117   ierr = PetscFree(a->a);CHKERRQ(ierr);
118   if (!a->singlemalloc) {
119     ierr = PetscFree(a->i);CHKERRQ(ierr);
120     ierr = PetscFree(a->j);CHKERRQ(ierr);
121   }
122   if (a->row) {
123     ierr = ISDestroy(a->row);CHKERRQ(ierr);
124   }
125   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
126   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
127   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
128   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
129   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
130   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
131   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
132 
133   if (a->inew){
134     ierr = PetscFree(a->inew);CHKERRQ(ierr);
135     a->inew = 0;
136   }
137   ierr = PetscFree(a);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
143 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
144 {
145   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
146 
147   PetscFunctionBegin;
148   switch (op) {
149   case MAT_ROW_ORIENTED:
150     a->roworiented    = PETSC_TRUE;
151     break;
152   case MAT_COLUMN_ORIENTED:
153     a->roworiented    = PETSC_FALSE;
154     break;
155   case MAT_COLUMNS_SORTED:
156     a->sorted         = PETSC_TRUE;
157     break;
158   case MAT_COLUMNS_UNSORTED:
159     a->sorted         = PETSC_FALSE;
160     break;
161   case MAT_KEEP_ZEROED_ROWS:
162     a->keepzeroedrows = PETSC_TRUE;
163     break;
164   case MAT_NO_NEW_NONZERO_LOCATIONS:
165     a->nonew          = 1;
166     break;
167   case MAT_NEW_NONZERO_LOCATION_ERR:
168     a->nonew          = -1;
169     break;
170   case MAT_NEW_NONZERO_ALLOCATION_ERR:
171     a->nonew          = -2;
172     break;
173   case MAT_YES_NEW_NONZERO_LOCATIONS:
174     a->nonew          = 0;
175     break;
176   case MAT_ROWS_SORTED:
177   case MAT_ROWS_UNSORTED:
178   case MAT_YES_NEW_DIAGONALS:
179   case MAT_IGNORE_OFF_PROC_ENTRIES:
180   case MAT_USE_HASH_TABLE:
181   case MAT_USE_SINGLE_PRECISION_SOLVES:
182     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
183     break;
184   case MAT_NO_NEW_DIAGONALS:
185     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
186   default:
187     SETERRQ(PETSC_ERR_SUP,"unknown option");
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
194 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
195 {
196   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
197   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
198   MatScalar    *aa,*aa_i;
199   PetscScalar  *v_i;
200 
201   PetscFunctionBegin;
202   bs  = a->bs;
203   ai  = a->i;
204   aj  = a->j;
205   aa  = a->a;
206   bs2 = a->bs2;
207 
208   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
209 
210   bn  = row/bs;   /* Block number */
211   bp  = row % bs; /* Block position */
212   M   = ai[bn+1] - ai[bn];
213   *ncols = bs*M;
214 
215   if (v) {
216     *v = 0;
217     if (*ncols) {
218       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
219       for (i=0; i<M; i++) { /* for each block in the block row */
220         v_i  = *v + i*bs;
221         aa_i = aa + bs2*(ai[bn] + i);
222         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
223       }
224     }
225   }
226 
227   if (cols) {
228     *cols = 0;
229     if (*ncols) {
230       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
231       for (i=0; i<M; i++) { /* for each block in the block row */
232         cols_i = *cols + i*bs;
233         itmp  = bs*aj[ai[bn] + i];
234         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
235       }
236     }
237   }
238 
239   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
240   /* this segment is currently removed, so only entries in the upper triangle are obtained */
241 #ifdef column_search
242   v_i    = *v    + M*bs;
243   cols_i = *cols + M*bs;
244   for (i=0; i<bn; i++){ /* for each block row */
245     M = ai[i+1] - ai[i];
246     for (j=0; j<M; j++){
247       itmp = aj[ai[i] + j];    /* block column value */
248       if (itmp == bn){
249         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
250         for (k=0; k<bs; k++) {
251           *cols_i++ = i*bs+k;
252           *v_i++    = aa_i[k];
253         }
254         *ncols += bs;
255         break;
256       }
257     }
258   }
259 #endif
260 
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
266 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
267 {
268   int ierr;
269 
270   PetscFunctionBegin;
271   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
272   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
278 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
279 {
280   int ierr;
281   PetscFunctionBegin;
282   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
288 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
289 {
290   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
291   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
292   PetscScalar  *aa;
293   FILE         *file;
294 
295   PetscFunctionBegin;
296   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
297   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
298   col_lens[0] = MAT_FILE_COOKIE;
299 
300   col_lens[1] = A->m;
301   col_lens[2] = A->m;
302   col_lens[3] = a->s_nz*bs2;
303 
304   /* store lengths of each row and write (including header) to file */
305   count = 0;
306   for (i=0; i<a->mbs; i++) {
307     for (j=0; j<bs; j++) {
308       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
309     }
310   }
311   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
312   ierr = PetscFree(col_lens);CHKERRQ(ierr);
313 
314   /* store column indices (zero start index) */
315   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
316   count = 0;
317   for (i=0; i<a->mbs; i++) {
318     for (j=0; j<bs; j++) {
319       for (k=a->i[i]; k<a->i[i+1]; k++) {
320         for (l=0; l<bs; l++) {
321           jj[count++] = bs*a->j[k] + l;
322         }
323       }
324     }
325   }
326   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
327   ierr = PetscFree(jj);CHKERRQ(ierr);
328 
329   /* store nonzero values */
330   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
331   count = 0;
332   for (i=0; i<a->mbs; i++) {
333     for (j=0; j<bs; j++) {
334       for (k=a->i[i]; k<a->i[i+1]; k++) {
335         for (l=0; l<bs; l++) {
336           aa[count++] = a->a[bs2*k + l*bs + j];
337         }
338       }
339     }
340   }
341   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
342   ierr = PetscFree(aa);CHKERRQ(ierr);
343 
344   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
345   if (file) {
346     fprintf(file,"-matload_block_size %d\n",a->bs);
347   }
348   PetscFunctionReturn(0);
349 }
350 
351 #undef __FUNCT__
352 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
353 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
354 {
355   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
356   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
357   char              *name;
358   PetscViewerFormat format;
359 
360   PetscFunctionBegin;
361   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
362   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
363   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
364     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
365   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
366     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
367   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
368     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
369     for (i=0; i<a->mbs; i++) {
370       for (j=0; j<bs; j++) {
371         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
372         for (k=a->i[i]; k<a->i[i+1]; k++) {
373           for (l=0; l<bs; l++) {
374 #if defined(PETSC_USE_COMPLEX)
375             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
376               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
377                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
378             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
379               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
380                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
381             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
383             }
384 #else
385             if (a->a[bs2*k + l*bs + j] != 0.0) {
386               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
387             }
388 #endif
389           }
390         }
391         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
392       }
393     }
394     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
395   } else {
396     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
397     for (i=0; i<a->mbs; i++) {
398       for (j=0; j<bs; j++) {
399         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
400         for (k=a->i[i]; k<a->i[i+1]; k++) {
401           for (l=0; l<bs; l++) {
402 #if defined(PETSC_USE_COMPLEX)
403             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
404               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
405                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
406             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
407               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
408                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
409             } else {
410               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
411             }
412 #else
413             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
414 #endif
415           }
416         }
417         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
418       }
419     }
420     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
421   }
422   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
428 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
429 {
430   Mat          A = (Mat) Aa;
431   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
432   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
433   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
434   MatScalar    *aa;
435   MPI_Comm     comm;
436   PetscViewer  viewer;
437 
438   PetscFunctionBegin;
439   /*
440       This is nasty. If this is called from an originally parallel matrix
441    then all processes call this,but only the first has the matrix so the
442    rest should return immediately.
443   */
444   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
445   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
446   if (rank) PetscFunctionReturn(0);
447 
448   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
449 
450   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
451   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
452 
453   /* loop over matrix elements drawing boxes */
454   color = PETSC_DRAW_BLUE;
455   for (i=0,row=0; i<mbs; i++,row+=bs) {
456     for (j=a->i[i]; j<a->i[i+1]; j++) {
457       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
458       x_l = a->j[j]*bs; x_r = x_l + 1.0;
459       aa = a->a + j*bs2;
460       for (k=0; k<bs; k++) {
461         for (l=0; l<bs; l++) {
462           if (PetscRealPart(*aa++) >=  0.) continue;
463           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
464         }
465       }
466     }
467   }
468   color = PETSC_DRAW_CYAN;
469   for (i=0,row=0; i<mbs; i++,row+=bs) {
470     for (j=a->i[i]; j<a->i[i+1]; j++) {
471       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
472       x_l = a->j[j]*bs; x_r = x_l + 1.0;
473       aa = a->a + j*bs2;
474       for (k=0; k<bs; k++) {
475         for (l=0; l<bs; l++) {
476           if (PetscRealPart(*aa++) != 0.) continue;
477           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
478         }
479       }
480     }
481   }
482 
483   color = PETSC_DRAW_RED;
484   for (i=0,row=0; i<mbs; i++,row+=bs) {
485     for (j=a->i[i]; j<a->i[i+1]; j++) {
486       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
487       x_l = a->j[j]*bs; x_r = x_l + 1.0;
488       aa = a->a + j*bs2;
489       for (k=0; k<bs; k++) {
490         for (l=0; l<bs; l++) {
491           if (PetscRealPart(*aa++) <= 0.) continue;
492           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
493         }
494       }
495     }
496   }
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
502 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
503 {
504   int          ierr;
505   PetscReal    xl,yl,xr,yr,w,h;
506   PetscDraw    draw;
507   PetscTruth   isnull;
508 
509   PetscFunctionBegin;
510 
511   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
512   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
513 
514   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
515   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
516   xr += w;    yr += h;  xl = -w;     yl = -h;
517   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
518   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
519   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatView_SeqSBAIJ"
525 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
526 {
527   int        ierr;
528   PetscTruth issocket,isascii,isbinary,isdraw;
529 
530   PetscFunctionBegin;
531   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
532   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
533   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
534   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
535   if (issocket) {
536     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
537   } else if (isascii){
538     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
539   } else if (isbinary) {
540     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
541   } else if (isdraw) {
542     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
543   } else {
544     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
545   }
546   PetscFunctionReturn(0);
547 }
548 
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
552 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
553 {
554   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
555   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
556   int        *ai = a->i,*ailen = a->ilen;
557   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
558   MatScalar  *ap,*aa = a->a,zero = 0.0;
559 
560   PetscFunctionBegin;
561   for (k=0; k<m; k++) { /* loop over rows */
562     row  = im[k]; brow = row/bs;
563     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
564     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
565     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
566     nrow = ailen[brow];
567     for (l=0; l<n; l++) { /* loop over columns */
568       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
569       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
570       col  = in[l] ;
571       bcol = col/bs;
572       cidx = col%bs;
573       ridx = row%bs;
574       high = nrow;
575       low  = 0; /* assume unsorted */
576       while (high-low > 5) {
577         t = (low+high)/2;
578         if (rp[t] > bcol) high = t;
579         else             low  = t;
580       }
581       for (i=low; i<high; i++) {
582         if (rp[i] > bcol) break;
583         if (rp[i] == bcol) {
584           *v++ = ap[bs2*i+bs*cidx+ridx];
585           goto finished;
586         }
587       }
588       *v++ = zero;
589       finished:;
590     }
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 
596 #undef __FUNCT__
597 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
598 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
599 {
600   PetscFunctionBegin;
601   SETERRQ(1,"Function not yet written for SBAIJ format");
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
606 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
607 {
608   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
609   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
610   int        m = A->m,*ip,N,*ailen = a->ilen;
611   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
612   MatScalar  *aa = a->a,*ap;
613 #if defined(PETSC_HAVE_SPOOLES)
614   PetscTruth   flag;
615 #endif
616 
617   PetscFunctionBegin;
618   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
619 
620   if (m) rmax = ailen[0];
621   for (i=1; i<mbs; i++) {
622     /* move each row back by the amount of empty slots (fshift) before it*/
623     fshift += imax[i-1] - ailen[i-1];
624     rmax   = PetscMax(rmax,ailen[i]);
625     if (fshift) {
626       ip = aj + ai[i]; ap = aa + bs2*ai[i];
627       N = ailen[i];
628       for (j=0; j<N; j++) {
629         ip[j-fshift] = ip[j];
630         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
631       }
632     }
633     ai[i] = ai[i-1] + ailen[i-1];
634   }
635   if (mbs) {
636     fshift += imax[mbs-1] - ailen[mbs-1];
637     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
638   }
639   /* reset ilen and imax for each row */
640   for (i=0; i<mbs; i++) {
641     ailen[i] = imax[i] = ai[i+1] - ai[i];
642   }
643   a->s_nz = ai[mbs];
644 
645   /* diagonals may have moved, reset it */
646   if (a->diag) {
647     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
648   }
649   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
650            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
651   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
652            a->reallocs);
653   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
654   a->reallocs          = 0;
655   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
656 
657 #if defined(PETSC_HAVE_SPOOLES)
658   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
659   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); }
660 #endif
661 
662   PetscFunctionReturn(0);
663 }
664 
665 /*
666    This function returns an array of flags which indicate the locations of contiguous
667    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
668    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
669    Assume: sizes should be long enough to hold all the values.
670 */
671 #undef __FUNCT__
672 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
673 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
674 {
675   int        i,j,k,row;
676   PetscTruth flg;
677 
678   PetscFunctionBegin;
679   for (i=0,j=0; i<n; j++) {
680     row = idx[i];
681     if (row%bs!=0) { /* Not the begining of a block */
682       sizes[j] = 1;
683       i++;
684     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
685       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
686       i++;
687     } else { /* Begining of the block, so check if the complete block exists */
688       flg = PETSC_TRUE;
689       for (k=1; k<bs; k++) {
690         if (row+k != idx[i+k]) { /* break in the block */
691           flg = PETSC_FALSE;
692           break;
693         }
694       }
695       if (flg == PETSC_TRUE) { /* No break in the bs */
696         sizes[j] = bs;
697         i+= bs;
698       } else {
699         sizes[j] = 1;
700         i++;
701       }
702     }
703   }
704   *bs_max = j;
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
710 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag)
711 {
712   Mat_SeqSBAIJ  *sbaij=(Mat_SeqSBAIJ*)A->data;
713   int           ierr,i,j,k,count,is_n,*is_idx,*rows;
714   int           bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max;
715   PetscScalar   zero = 0.0;
716   MatScalar     *aa;
717 
718   PetscFunctionBegin;
719   /* Make a copy of the IS and  sort it */
720   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
721   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
722 
723   /* allocate memory for rows,sizes */
724   ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
725   sizes = rows + is_n;
726 
727   /* initialize copy IS values to rows, and sort them */
728   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
729   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
730   if (sbaij->keepzeroedrows) { /* do not change nonzero structure */
731     for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */
732     bs_max = is_n;            /* bs_max: num. of contiguous block row in the row */
733   } else {
734     ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
735   }
736   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
737 
738   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
739     row   = rows[j];                  /* row to be zeroed */
740     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
741     count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */
742     aa    = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs);
743     if (sizes[i] == bs && !sbaij->keepzeroedrows) {
744       if (diag) {
745         if (sbaij->ilen[row/bs] > 0) {
746           sbaij->ilen[row/bs] = 1;
747           sbaij->j[sbaij->i[row/bs]] = row/bs;
748           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
749         }
750         /* Now insert all the diagoanl values for this bs */
751         for (k=0; k<bs; k++) {
752           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
753         }
754       } else { /* (!diag) */
755         sbaij->ilen[row/bs] = 0;
756       } /* end (!diag) */
757     } else { /* (sizes[i] != bs), broken block */
758 #if defined (PETSC_USE_DEBUG)
759       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
760 #endif
761       for (k=0; k<count; k++) {
762         aa[0] = zero;
763         aa+=bs;
764       }
765       if (diag) {
766         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
767       }
768     }
769   }
770 
771   ierr = PetscFree(rows);CHKERRQ(ierr);
772   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
773   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 
777 /* Only add/insert a(i,j) with i<=j (blocks).
778    Any a(i,j) with i>j input by user is ingored.
779 */
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
783 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
784 {
785   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
786   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
787   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
788   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
789   int         ridx,cidx,bs2=a->bs2,ierr;
790   MatScalar   *ap,value,*aa=a->a,*bap;
791 
792   PetscFunctionBegin;
793 
794   for (k=0; k<m; k++) { /* loop over added rows */
795     row  = im[k];       /* row number */
796     brow = row/bs;      /* block row number */
797     if (row < 0) continue;
798 #if defined(PETSC_USE_BOPT_g)
799     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
800 #endif
801     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
802     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
803     rmax = imax[brow];  /* maximum space allocated for this row */
804     nrow = ailen[brow]; /* actual length of this row */
805     low  = 0;
806 
807     for (l=0; l<n; l++) { /* loop over added columns */
808       if (in[l] < 0) continue;
809 #if defined(PETSC_USE_BOPT_g)
810       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
811 #endif
812       col = in[l];
813       bcol = col/bs;              /* block col number */
814 
815       if (brow <= bcol){
816         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
817         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
818           /* element value a(k,l) */
819           if (roworiented) {
820             value = v[l + k*n];
821           } else {
822             value = v[k + l*m];
823           }
824 
825           /* move pointer bap to a(k,l) quickly and add/insert value */
826           if (!sorted) low = 0; high = nrow;
827           while (high-low > 7) {
828             t = (low+high)/2;
829             if (rp[t] > bcol) high = t;
830             else              low  = t;
831           }
832           for (i=low; i<high; i++) {
833             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
834             if (rp[i] > bcol) break;
835             if (rp[i] == bcol) {
836               bap  = ap +  bs2*i + bs*cidx + ridx;
837               if (is == ADD_VALUES) *bap += value;
838               else                  *bap  = value;
839               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
840               if (brow == bcol && ridx < cidx){
841                 bap  = ap +  bs2*i + bs*ridx + cidx;
842                 if (is == ADD_VALUES) *bap += value;
843                 else                  *bap  = value;
844               }
845               goto noinsert1;
846             }
847           }
848 
849       if (nonew == 1) goto noinsert1;
850       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
851       if (nrow >= rmax) {
852         /* there is no extra room in row, therefore enlarge */
853         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
854         MatScalar *new_a;
855 
856         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
857 
858         /* Malloc new storage space */
859         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
860         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
861         new_j = (int*)(new_a + bs2*new_nz);
862         new_i = new_j + new_nz;
863 
864         /* copy over old data into new slots */
865         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
866         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
867         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
868         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
869         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
870         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
871         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
872         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
873         /* free up old matrix storage */
874         ierr = PetscFree(a->a);CHKERRQ(ierr);
875         if (!a->singlemalloc) {
876           ierr = PetscFree(a->i);CHKERRQ(ierr);
877           ierr = PetscFree(a->j);CHKERRQ(ierr);
878         }
879         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
880         a->singlemalloc = PETSC_TRUE;
881 
882         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
883         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
884         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
885         a->s_maxnz += bs2*CHUNKSIZE;
886         a->reallocs++;
887         a->s_nz++;
888       }
889 
890       N = nrow++ - 1;
891       /* shift up all the later entries in this row */
892       for (ii=N; ii>=i; ii--) {
893         rp[ii+1] = rp[ii];
894         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
895       }
896       if (N>=i) {
897         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
898       }
899       rp[i]                      = bcol;
900       ap[bs2*i + bs*cidx + ridx] = value;
901       noinsert1:;
902       low = i;
903       /* } */
904         }
905       } /* end of if .. if..  */
906     }                     /* end of loop over added columns */
907     ailen[brow] = nrow;
908   }                       /* end of loop over added rows */
909 
910   PetscFunctionReturn(0);
911 }
912 
913 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
914 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
915 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
916 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
917 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
918 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
919 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
920 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat);
921 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
922 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
923 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
924 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
925 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
926 extern int MatZeroEntries_SeqSBAIJ(Mat);
927 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
928 
929 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
930 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
931 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
932 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
933 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
934 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
935 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
936 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
937 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
938 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
939 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
940 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
941 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
942 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
943 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
944 
945 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
946 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
947 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
948 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
949 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
950 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
951 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
952 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
953 
954 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
955 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
956 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
957 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
958 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
959 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
960 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
961 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
962 
963 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
964 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
965 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
966 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
967 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
968 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
969 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
970 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
971 
972 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
973 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
974 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
975 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
976 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
977 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
978 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
979 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
980 
981 #ifdef HAVE_MatICCFactor
982 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
983 #undef __FUNCT__
984 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
985 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
986 {
987   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
988   Mat         outA;
989   int         ierr;
990   PetscTruth  row_identity,col_identity;
991 
992   PetscFunctionBegin;
993   /*
994   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
995   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
996   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
997   if (!row_identity || !col_identity) {
998     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
999   }
1000   */
1001 
1002   outA          = inA;
1003   inA->factor   = FACTOR_CHOLESKY;
1004 
1005   if (!a->diag) {
1006     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1007   }
1008   /*
1009       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1010       for ILU(0) factorization with natural ordering
1011   */
1012   switch (a->bs) {
1013   case 1:
1014     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1015     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1016     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1017   case 2:
1018     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1019     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1020     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1021     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1022     break;
1023   case 3:
1024     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1025     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1026     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1027     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1028     break;
1029   case 4:
1030     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1031     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1032     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1033     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1034     break;
1035   case 5:
1036     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1037     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1038     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1039     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1040     break;
1041   case 6:
1042     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1043     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1044     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1045     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1046     break;
1047   case 7:
1048     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1049     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1050     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1051     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1052     break;
1053   default:
1054     a->row        = row;
1055     a->icol       = col;
1056     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1057     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1058 
1059     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1060     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1061     PetscLogObjectParent(inA,a->icol);
1062 
1063     if (!a->solve_work) {
1064       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1065       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1066     }
1067   }
1068 
1069   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1070 
1071   PetscFunctionReturn(0);
1072 }
1073 #endif
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1077 int MatPrintHelp_SeqSBAIJ(Mat A)
1078 {
1079   static PetscTruth called = PETSC_FALSE;
1080   MPI_Comm          comm = A->comm;
1081   int               ierr;
1082 
1083   PetscFunctionBegin;
1084   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1085   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1086   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 EXTERN_C_BEGIN
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1093 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1094 {
1095   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1096   int         i,nz,n;
1097 
1098   PetscFunctionBegin;
1099   nz = baij->s_maxnz;
1100   n  = mat->n;
1101   for (i=0; i<nz; i++) {
1102     baij->j[i] = indices[i];
1103   }
1104   baij->s_nz = nz;
1105   for (i=0; i<n; i++) {
1106     baij->ilen[i] = baij->imax[i];
1107   }
1108 
1109   PetscFunctionReturn(0);
1110 }
1111 EXTERN_C_END
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1115 /*@
1116     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1117        in the matrix.
1118 
1119   Input Parameters:
1120 +  mat     - the SeqSBAIJ matrix
1121 -  indices - the column indices
1122 
1123   Level: advanced
1124 
1125   Notes:
1126     This can be called if you have precomputed the nonzero structure of the
1127   matrix and want to provide it to the matrix object to improve the performance
1128   of the MatSetValues() operation.
1129 
1130     You MUST have set the correct numbers of nonzeros per row in the call to
1131   MatCreateSeqSBAIJ().
1132 
1133     MUST be called before any calls to MatSetValues()
1134 
1135 .seealso: MatCreateSeqSBAIJ
1136 @*/
1137 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1138 {
1139   int ierr,(*f)(Mat,int *);
1140 
1141   PetscFunctionBegin;
1142   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1143   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1144   if (f) {
1145     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1146   } else {
1147     SETERRQ(1,"Wrong type of matrix to set column indices");
1148   }
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1154 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1155 {
1156   int        ierr;
1157 
1158   PetscFunctionBegin;
1159   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 #include "petscblaslapack.h"
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1166 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1167 {
1168   int          ierr,one=1;
1169   Mat_SeqSBAIJ *x  = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data;
1170 
1171   PetscFunctionBegin;
1172   if (str == SAME_NONZERO_PATTERN) {
1173     BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one);
1174   } else {
1175     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1176   }
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 /* -------------------------------------------------------------------*/
1181 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1182        MatGetRow_SeqSBAIJ,
1183        MatRestoreRow_SeqSBAIJ,
1184        MatMult_SeqSBAIJ_N,
1185        MatMultAdd_SeqSBAIJ_N,
1186        MatMultTranspose_SeqSBAIJ,
1187        MatMultTransposeAdd_SeqSBAIJ,
1188        MatSolve_SeqSBAIJ_N,
1189        0,
1190        0,
1191        0,
1192        0,
1193        MatCholeskyFactor_SeqSBAIJ,
1194        MatRelax_SeqSBAIJ,
1195        MatTranspose_SeqSBAIJ,
1196        MatGetInfo_SeqSBAIJ,
1197        MatEqual_SeqSBAIJ,
1198        MatGetDiagonal_SeqSBAIJ,
1199        MatDiagonalScale_SeqSBAIJ,
1200        MatNorm_SeqSBAIJ,
1201        0,
1202        MatAssemblyEnd_SeqSBAIJ,
1203        0,
1204        MatSetOption_SeqSBAIJ,
1205        MatZeroEntries_SeqSBAIJ,
1206        MatZeroRows_SeqSBAIJ,
1207        0,
1208        0,
1209        MatCholeskyFactorSymbolic_SeqSBAIJ,
1210        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1211        MatSetUpPreallocation_SeqSBAIJ,
1212        0,
1213        MatICCFactorSymbolic_SeqSBAIJ,
1214        0,
1215        0,
1216        MatDuplicate_SeqSBAIJ,
1217        0,
1218        0,
1219        0,
1220        0,
1221        MatAXPY_SeqSBAIJ,
1222        MatGetSubMatrices_SeqSBAIJ,
1223        MatIncreaseOverlap_SeqSBAIJ,
1224        MatGetValues_SeqSBAIJ,
1225        0,
1226        MatPrintHelp_SeqSBAIJ,
1227        MatScale_SeqSBAIJ,
1228        0,
1229        0,
1230        0,
1231        MatGetBlockSize_SeqSBAIJ,
1232        MatGetRowIJ_SeqSBAIJ,
1233        MatRestoreRowIJ_SeqSBAIJ,
1234        0,
1235        0,
1236        0,
1237        0,
1238        0,
1239        0,
1240        MatSetValuesBlocked_SeqSBAIJ,
1241        MatGetSubMatrix_SeqSBAIJ,
1242        0,
1243        0,
1244        MatGetPetscMaps_Petsc,
1245        0,
1246        0,
1247        0,
1248        0,
1249        0,
1250        0,
1251        MatGetRowMax_SeqSBAIJ};
1252 
1253 EXTERN_C_BEGIN
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1256 int MatStoreValues_SeqSBAIJ(Mat mat)
1257 {
1258   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1259   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1260   int         ierr;
1261 
1262   PetscFunctionBegin;
1263   if (aij->nonew != 1) {
1264     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1265   }
1266 
1267   /* allocate space for values if not already there */
1268   if (!aij->saved_values) {
1269     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1270   }
1271 
1272   /* copy values over */
1273   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 EXTERN_C_END
1277 
1278 EXTERN_C_BEGIN
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1281 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1282 {
1283   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1284   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1285 
1286   PetscFunctionBegin;
1287   if (aij->nonew != 1) {
1288     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1289   }
1290   if (!aij->saved_values) {
1291     SETERRQ(1,"Must call MatStoreValues(A);first");
1292   }
1293 
1294   /* copy values over */
1295   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1296   PetscFunctionReturn(0);
1297 }
1298 EXTERN_C_END
1299 
1300 EXTERN_C_BEGIN
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1303 int MatCreate_SeqSBAIJ(Mat B)
1304 {
1305   Mat_SeqSBAIJ *b;
1306   int          ierr,size;
1307 
1308   PetscFunctionBegin;
1309   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1310   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1311   B->m = B->M = PetscMax(B->m,B->M);
1312   B->n = B->N = PetscMax(B->n,B->N);
1313 
1314   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1315   B->data = (void*)b;
1316   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1317   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1318   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1319   B->ops->view        = MatView_SeqSBAIJ;
1320   B->factor           = 0;
1321   B->lupivotthreshold = 1.0;
1322   B->mapping          = 0;
1323   b->row              = 0;
1324   b->icol             = 0;
1325   b->reallocs         = 0;
1326   b->saved_values     = 0;
1327 
1328   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1329   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1330 
1331   b->sorted           = PETSC_FALSE;
1332   b->roworiented      = PETSC_TRUE;
1333   b->nonew            = 0;
1334   b->diag             = 0;
1335   b->solve_work       = 0;
1336   b->mult_work        = 0;
1337   B->spptr            = 0;
1338   b->keepzeroedrows   = PETSC_FALSE;
1339 
1340   b->inew             = 0;
1341   b->jnew             = 0;
1342   b->anew             = 0;
1343   b->a2anew           = 0;
1344   b->permute          = PETSC_FALSE;
1345 
1346   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1347                                      "MatStoreValues_SeqSBAIJ",
1348                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1349   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1350                                      "MatRetrieveValues_SeqSBAIJ",
1351                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1352   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1353                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1354                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 EXTERN_C_END
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1361 /*@C
1362    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1363    compressed row) format.  For good matrix assembly performance the
1364    user should preallocate the matrix storage by setting the parameter nz
1365    (or the array nnz).  By setting these parameters accurately, performance
1366    during matrix assembly can be increased by more than a factor of 50.
1367 
1368    Collective on Mat
1369 
1370    Input Parameters:
1371 +  A - the symmetric matrix
1372 .  bs - size of block
1373 .  nz - number of block nonzeros per block row (same for all rows)
1374 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1375          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1376 
1377    Options Database Keys:
1378 .   -mat_no_unroll - uses code that does not unroll the loops in the
1379                      block calculations (much slower)
1380 .    -mat_block_size - size of the blocks to use
1381 
1382    Level: intermediate
1383 
1384    Notes:
1385    Specify the preallocated storage with either nz or nnz (not both).
1386    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1387    allocation.  For additional details, see the users manual chapter on
1388    matrices.
1389 
1390 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1391 @*/
1392 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1393 {
1394   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1395   int          i,len,ierr,mbs,bs2;
1396   PetscTruth   flg;
1397   int          s_nz;
1398 
1399   PetscFunctionBegin;
1400   B->preallocated = PETSC_TRUE;
1401   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1402   mbs  = B->m/bs;
1403   bs2  = bs*bs;
1404 
1405   if (mbs*bs != B->m) {
1406     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1407   }
1408 
1409   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1410   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1411   if (nnz) {
1412     for (i=0; i<mbs; i++) {
1413       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1414       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1415     }
1416   }
1417 
1418   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1419   if (!flg) {
1420     switch (bs) {
1421     case 1:
1422       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1423       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1424       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1425       B->ops->mult            = MatMult_SeqSBAIJ_1;
1426       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1427       break;
1428     case 2:
1429       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1430       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1431       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1432       B->ops->mult            = MatMult_SeqSBAIJ_2;
1433       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1434       break;
1435     case 3:
1436       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1437       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1438       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1439       B->ops->mult            = MatMult_SeqSBAIJ_3;
1440       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1441       break;
1442     case 4:
1443       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1444       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1445       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1446       B->ops->mult            = MatMult_SeqSBAIJ_4;
1447       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1448       break;
1449     case 5:
1450       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1451       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1452       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1453       B->ops->mult            = MatMult_SeqSBAIJ_5;
1454       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1455       break;
1456     case 6:
1457       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1458       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1459       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1460       B->ops->mult            = MatMult_SeqSBAIJ_6;
1461       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1462       break;
1463     case 7:
1464       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1465       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1466       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1467       B->ops->mult            = MatMult_SeqSBAIJ_7;
1468       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1469       break;
1470     }
1471   }
1472 
1473   b->mbs = mbs;
1474   b->nbs = mbs;
1475   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1476   if (!nnz) {
1477     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1478     else if (nz <= 0)        nz = 1;
1479     for (i=0; i<mbs; i++) {
1480       b->imax[i] = nz;
1481     }
1482     nz = nz*mbs; /* total nz */
1483   } else {
1484     nz = 0;
1485     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1486   }
1487   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1488   s_nz = nz;
1489 
1490   /* allocate the matrix space */
1491   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1492   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1493   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1494   b->j = (int*)(b->a + s_nz*bs2);
1495   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1496   b->i = b->j + s_nz;
1497   b->singlemalloc = PETSC_TRUE;
1498 
1499   /* pointer to beginning of each row */
1500   b->i[0] = 0;
1501   for (i=1; i<mbs+1; i++) {
1502     b->i[i] = b->i[i-1] + b->imax[i-1];
1503   }
1504 
1505   /* b->ilen will count nonzeros in each block row so far. */
1506   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1507   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1508   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1509 
1510   b->bs               = bs;
1511   b->bs2              = bs2;
1512   b->s_nz             = 0;
1513   b->s_maxnz          = s_nz*bs2;
1514 
1515   b->inew             = 0;
1516   b->jnew             = 0;
1517   b->anew             = 0;
1518   b->a2anew           = 0;
1519   b->permute          = PETSC_FALSE;
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatCreateSeqSBAIJ"
1526 /*@C
1527    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1528    compressed row) format.  For good matrix assembly performance the
1529    user should preallocate the matrix storage by setting the parameter nz
1530    (or the array nnz).  By setting these parameters accurately, performance
1531    during matrix assembly can be increased by more than a factor of 50.
1532 
1533    Collective on MPI_Comm
1534 
1535    Input Parameters:
1536 +  comm - MPI communicator, set to PETSC_COMM_SELF
1537 .  bs - size of block
1538 .  m - number of rows, or number of columns
1539 .  nz - number of block nonzeros per block row (same for all rows)
1540 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1541          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1542 
1543    Output Parameter:
1544 .  A - the symmetric matrix
1545 
1546    Options Database Keys:
1547 .   -mat_no_unroll - uses code that does not unroll the loops in the
1548                      block calculations (much slower)
1549 .    -mat_block_size - size of the blocks to use
1550 
1551    Level: intermediate
1552 
1553    Notes:
1554 
1555    Specify the preallocated storage with either nz or nnz (not both).
1556    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1557    allocation.  For additional details, see the users manual chapter on
1558    matrices.
1559 
1560 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1561 @*/
1562 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1563 {
1564   int ierr;
1565 
1566   PetscFunctionBegin;
1567   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1568   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1569   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1575 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1576 {
1577   Mat         C;
1578   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1579   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1580 
1581   PetscFunctionBegin;
1582   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1583 
1584   *B = 0;
1585   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1586   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1587   c    = (Mat_SeqSBAIJ*)C->data;
1588 
1589   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1590   C->preallocated   = PETSC_TRUE;
1591   C->factor         = A->factor;
1592   c->row            = 0;
1593   c->icol           = 0;
1594   c->saved_values   = 0;
1595   c->keepzeroedrows = a->keepzeroedrows;
1596   C->assembled      = PETSC_TRUE;
1597 
1598   c->bs         = a->bs;
1599   c->bs2        = a->bs2;
1600   c->mbs        = a->mbs;
1601   c->nbs        = a->nbs;
1602 
1603   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1604   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1605   for (i=0; i<mbs; i++) {
1606     c->imax[i] = a->imax[i];
1607     c->ilen[i] = a->ilen[i];
1608   }
1609 
1610   /* allocate the matrix space */
1611   c->singlemalloc = PETSC_TRUE;
1612   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1613   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1614   c->j = (int*)(c->a + nz*bs2);
1615   c->i = c->j + nz;
1616   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1617   if (mbs > 0) {
1618     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1619     if (cpvalues == MAT_COPY_VALUES) {
1620       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1621     } else {
1622       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1623     }
1624   }
1625 
1626   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1627   c->sorted      = a->sorted;
1628   c->roworiented = a->roworiented;
1629   c->nonew       = a->nonew;
1630 
1631   if (a->diag) {
1632     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1633     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1634     for (i=0; i<mbs; i++) {
1635       c->diag[i] = a->diag[i];
1636     }
1637   } else c->diag        = 0;
1638   c->s_nz               = a->s_nz;
1639   c->s_maxnz            = a->s_maxnz;
1640   c->solve_work         = 0;
1641   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1642   c->mult_work          = 0;
1643   *B = C;
1644   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 EXTERN_C_BEGIN
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1651 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1652 {
1653   Mat_SeqSBAIJ *a;
1654   Mat          B;
1655   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1656   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1657   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1658   int          *masked,nmask,tmp,bs2,ishift;
1659   PetscScalar  *aa;
1660   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1661 
1662   PetscFunctionBegin;
1663   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1664   bs2  = bs*bs;
1665 
1666   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1667   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1668   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1669   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1670   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1671   M = header[1]; N = header[2]; nz = header[3];
1672 
1673   if (header[3] < 0) {
1674     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1675   }
1676 
1677   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1678 
1679   /*
1680      This code adds extra rows to make sure the number of rows is
1681     divisible by the blocksize
1682   */
1683   mbs        = M/bs;
1684   extra_rows = bs - M + bs*(mbs);
1685   if (extra_rows == bs) extra_rows = 0;
1686   else                  mbs++;
1687   if (extra_rows) {
1688     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1689   }
1690 
1691   /* read in row lengths */
1692   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1693   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1694   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1695 
1696   /* read in column indices */
1697   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1698   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1699   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1700 
1701   /* loop over row lengths determining block row lengths */
1702   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1703   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1704   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1705   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1706   masked   = mask + mbs;
1707   rowcount = 0; nzcount = 0;
1708   for (i=0; i<mbs; i++) {
1709     nmask = 0;
1710     for (j=0; j<bs; j++) {
1711       kmax = rowlengths[rowcount];
1712       for (k=0; k<kmax; k++) {
1713         tmp = jj[nzcount++]/bs;   /* block col. index */
1714         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1715       }
1716       rowcount++;
1717     }
1718     s_browlengths[i] += nmask;
1719 
1720     /* zero out the mask elements we set */
1721     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1722   }
1723 
1724   /* create our matrix */
1725   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr);
1726   B = *A;
1727   a = (Mat_SeqSBAIJ*)B->data;
1728 
1729   /* set matrix "i" values */
1730   a->i[0] = 0;
1731   for (i=1; i<= mbs; i++) {
1732     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1733     a->ilen[i-1] = s_browlengths[i-1];
1734   }
1735   a->s_nz = a->i[mbs];
1736 
1737   /* read in nonzero values */
1738   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1739   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1740   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1741 
1742   /* set "a" and "j" values into matrix */
1743   nzcount = 0; jcount = 0;
1744   for (i=0; i<mbs; i++) {
1745     nzcountb = nzcount;
1746     nmask    = 0;
1747     for (j=0; j<bs; j++) {
1748       kmax = rowlengths[i*bs+j];
1749       for (k=0; k<kmax; k++) {
1750         tmp = jj[nzcount++]/bs; /* block col. index */
1751         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1752       }
1753     }
1754     /* sort the masked values */
1755     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1756 
1757     /* set "j" values into matrix */
1758     maskcount = 1;
1759     for (j=0; j<nmask; j++) {
1760       a->j[jcount++]  = masked[j];
1761       mask[masked[j]] = maskcount++;
1762     }
1763 
1764     /* set "a" values into matrix */
1765     ishift = bs2*a->i[i];
1766     for (j=0; j<bs; j++) {
1767       kmax = rowlengths[i*bs+j];
1768       for (k=0; k<kmax; k++) {
1769         tmp       = jj[nzcountb]/bs ; /* block col. index */
1770         if (tmp >= i){
1771           block     = mask[tmp] - 1;
1772           point     = jj[nzcountb] - bs*tmp;
1773           idx       = ishift + bs2*block + j + bs*point;
1774           a->a[idx] = aa[nzcountb];
1775         }
1776         nzcountb++;
1777       }
1778     }
1779     /* zero out the mask elements we set */
1780     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1781   }
1782   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1783 
1784   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1785   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1786   ierr = PetscFree(aa);CHKERRQ(ierr);
1787   ierr = PetscFree(jj);CHKERRQ(ierr);
1788   ierr = PetscFree(mask);CHKERRQ(ierr);
1789 
1790   B->assembled = PETSC_TRUE;
1791   ierr = MatView_Private(B);CHKERRQ(ierr);
1792   PetscFunctionReturn(0);
1793 }
1794 EXTERN_C_END
1795 
1796 #undef __FUNCT__
1797 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1798 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1799 {
1800   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1801   MatScalar    *aa=a->a,*v,*v1;
1802   PetscScalar  *x,*b,*t,sum,d;
1803   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1804   int          nz,nz1,*vj,*vj1,i;
1805 
1806   PetscFunctionBegin;
1807   its = its*lits;
1808   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1809 
1810   if (bs > 1)
1811     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1812 
1813   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1814   if (xx != bb) {
1815     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1816   } else {
1817     b = x;
1818   }
1819 
1820   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1821 
1822   if (flag & SOR_ZERO_INITIAL_GUESS) {
1823     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1824       for (i=0; i<m; i++)
1825         t[i] = b[i];
1826 
1827       for (i=0; i<m; i++){
1828         d  = *(aa + ai[i]);  /* diag[i] */
1829         v  = aa + ai[i] + 1;
1830         vj = aj + ai[i] + 1;
1831         nz = ai[i+1] - ai[i] - 1;
1832         x[i] = omega*t[i]/d;
1833         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1834         PetscLogFlops(2*nz-1);
1835       }
1836     }
1837 
1838     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1839       for (i=0; i<m; i++)
1840         t[i] = b[i];
1841 
1842       for (i=0; i<m-1; i++){  /* update rhs */
1843         v  = aa + ai[i] + 1;
1844         vj = aj + ai[i] + 1;
1845         nz = ai[i+1] - ai[i] - 1;
1846         while (nz--) t[*vj++] -= x[i]*(*v++);
1847         PetscLogFlops(2*nz-1);
1848       }
1849       for (i=m-1; i>=0; i--){
1850         d  = *(aa + ai[i]);
1851         v  = aa + ai[i] + 1;
1852         vj = aj + ai[i] + 1;
1853         nz = ai[i+1] - ai[i] - 1;
1854         sum = t[i];
1855         while (nz--) sum -= x[*vj++]*(*v++);
1856         PetscLogFlops(2*nz-1);
1857         x[i] =   (1-omega)*x[i] + omega*sum/d;
1858       }
1859     }
1860     its--;
1861   }
1862 
1863   while (its--) {
1864     /*
1865        forward sweep:
1866        for i=0,...,m-1:
1867          sum[i] = (b[i] - U(i,:)x )/d[i];
1868          x[i]   = (1-omega)x[i] + omega*sum[i];
1869          b      = b - x[i]*U^T(i,:);
1870 
1871     */
1872     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1873       for (i=0; i<m; i++)
1874         t[i] = b[i];
1875 
1876       for (i=0; i<m; i++){
1877         d  = *(aa + ai[i]);  /* diag[i] */
1878         v  = aa + ai[i] + 1; v1=v;
1879         vj = aj + ai[i] + 1; vj1=vj;
1880         nz = ai[i+1] - ai[i] - 1; nz1=nz;
1881         sum = t[i];
1882         while (nz1--) sum -= (*v1++)*x[*vj1++];
1883         x[i] = (1-omega)*x[i] + omega*sum/d;
1884         while (nz--) t[*vj++] -= x[i]*(*v++);
1885         PetscLogFlops(4*nz-2);
1886       }
1887     }
1888 
1889   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1890       /*
1891        backward sweep:
1892        b = b - x[i]*U^T(i,:), i=0,...,n-2
1893        for i=m-1,...,0:
1894          sum[i] = (b[i] - U(i,:)x )/d[i];
1895          x[i]   = (1-omega)x[i] + omega*sum[i];
1896       */
1897       for (i=0; i<m; i++)
1898         t[i] = b[i];
1899 
1900       for (i=0; i<m-1; i++){  /* update rhs */
1901         v  = aa + ai[i] + 1;
1902         vj = aj + ai[i] + 1;
1903         nz = ai[i+1] - ai[i] - 1;
1904         while (nz--) t[*vj++] -= x[i]*(*v++);
1905         PetscLogFlops(2*nz-1);
1906       }
1907       for (i=m-1; i>=0; i--){
1908         d  = *(aa + ai[i]);
1909         v  = aa + ai[i] + 1;
1910         vj = aj + ai[i] + 1;
1911         nz = ai[i+1] - ai[i] - 1;
1912         sum = t[i];
1913         while (nz--) sum -= x[*vj++]*(*v++);
1914         PetscLogFlops(2*nz-1);
1915         x[i] =   (1-omega)*x[i] + omega*sum/d;
1916       }
1917     }
1918   }
1919 
1920   ierr = PetscFree(t); CHKERRQ(ierr);
1921   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1922   if (bb != xx) {
1923     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1924   }
1925 
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 
1930 
1931 
1932 
1933 
1934