xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 0b498fdb2a73e86aac2706d824139bbc5f26bf18)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 /* extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); */
50 
51 #undef __FUNCT__
52 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
53 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
54 {
55   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
56 
57   PetscFunctionBegin;
58 
59   if (ia) {
60     SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
61   }
62 
63 #ifdef NEW
64   int         ierr;
65 
66   if (!ia) PetscFunctionReturn(0);
67 
68 /*
69   if (symmetric) {
70     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
71   } else if (oshift == 1) {
72     int nz = a->i[n];
73     for (i=0; i<nz; i++) a->j[i]++;
74     for (i=0; i<n+1; i++) a->i[i]++;
75     *ia = a->i; *ja = a->j;
76   } else {
77 */
78     *ia = a->i; *ja = a->j;
79     /* } */
80 #endif
81   *nn = a->mbs;
82   PetscFunctionReturn(0);
83 }
84 
85 #undef __FUNCT__
86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
87 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
88 {
89   PetscFunctionBegin;
90   if (!ia) PetscFunctionReturn(0);
91   SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
92   /* PetscFunctionReturn(0); */
93 }
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
97 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
98 {
99   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
100 
101   PetscFunctionBegin;
102   *bs = sbaij->bs;
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
108 int MatDestroy_SeqSBAIJ(Mat A)
109 {
110   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
111   int         ierr;
112 
113   PetscFunctionBegin;
114 #if defined(PETSC_USE_LOG)
115   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
116 #endif
117   ierr = PetscFree(a->a);CHKERRQ(ierr);
118   if (!a->singlemalloc) {
119     ierr = PetscFree(a->i);CHKERRQ(ierr);
120     ierr = PetscFree(a->j);CHKERRQ(ierr);
121   }
122   if (a->row) {
123     ierr = ISDestroy(a->row);CHKERRQ(ierr);
124   }
125   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
126   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
127   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
128   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
129   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
130   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
131   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
132 
133   if (a->inew){
134     ierr = PetscFree(a->inew);CHKERRQ(ierr);
135     a->inew = 0;
136   }
137   ierr = PetscFree(a);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
143 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
144 {
145   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
146 
147   PetscFunctionBegin;
148   switch (op) {
149   case MAT_ROW_ORIENTED:
150     a->roworiented    = PETSC_TRUE;
151     break;
152   case MAT_COLUMN_ORIENTED:
153     a->roworiented    = PETSC_FALSE;
154     break;
155   case MAT_COLUMNS_SORTED:
156     a->sorted         = PETSC_TRUE;
157     break;
158   case MAT_COLUMNS_UNSORTED:
159     a->sorted         = PETSC_FALSE;
160     break;
161   case MAT_KEEP_ZEROED_ROWS:
162     a->keepzeroedrows = PETSC_TRUE;
163     break;
164   case MAT_NO_NEW_NONZERO_LOCATIONS:
165     a->nonew          = 1;
166     break;
167   case MAT_NEW_NONZERO_LOCATION_ERR:
168     a->nonew          = -1;
169     break;
170   case MAT_NEW_NONZERO_ALLOCATION_ERR:
171     a->nonew          = -2;
172     break;
173   case MAT_YES_NEW_NONZERO_LOCATIONS:
174     a->nonew          = 0;
175     break;
176   case MAT_ROWS_SORTED:
177   case MAT_ROWS_UNSORTED:
178   case MAT_YES_NEW_DIAGONALS:
179   case MAT_IGNORE_OFF_PROC_ENTRIES:
180   case MAT_USE_HASH_TABLE:
181   case MAT_USE_SINGLE_PRECISION_SOLVES:
182     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
183     break;
184   case MAT_NO_NEW_DIAGONALS:
185     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
186   default:
187     SETERRQ(PETSC_ERR_SUP,"unknown option");
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
194 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
195 {
196   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
197   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
198   MatScalar    *aa,*aa_i;
199   PetscScalar  *v_i;
200 
201   PetscFunctionBegin;
202   bs  = a->bs;
203   ai  = a->i;
204   aj  = a->j;
205   aa  = a->a;
206   bs2 = a->bs2;
207 
208   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
209 
210   bn  = row/bs;   /* Block number */
211   bp  = row % bs; /* Block position */
212   M   = ai[bn+1] - ai[bn];
213   *ncols = bs*M;
214 
215   if (v) {
216     *v = 0;
217     if (*ncols) {
218       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
219       for (i=0; i<M; i++) { /* for each block in the block row */
220         v_i  = *v + i*bs;
221         aa_i = aa + bs2*(ai[bn] + i);
222         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
223       }
224     }
225   }
226 
227   if (cols) {
228     *cols = 0;
229     if (*ncols) {
230       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
231       for (i=0; i<M; i++) { /* for each block in the block row */
232         cols_i = *cols + i*bs;
233         itmp  = bs*aj[ai[bn] + i];
234         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
235       }
236     }
237   }
238 
239   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
240   /* this segment is currently removed, so only entries in the upper triangle are obtained */
241 #ifdef column_search
242   v_i    = *v    + M*bs;
243   cols_i = *cols + M*bs;
244   for (i=0; i<bn; i++){ /* for each block row */
245     M = ai[i+1] - ai[i];
246     for (j=0; j<M; j++){
247       itmp = aj[ai[i] + j];    /* block column value */
248       if (itmp == bn){
249         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
250         for (k=0; k<bs; k++) {
251           *cols_i++ = i*bs+k;
252           *v_i++    = aa_i[k];
253         }
254         *ncols += bs;
255         break;
256       }
257     }
258   }
259 #endif
260 
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
266 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
267 {
268   int ierr;
269 
270   PetscFunctionBegin;
271   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
272   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
278 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
279 {
280   int ierr;
281   PetscFunctionBegin;
282   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
288 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
289 {
290   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
291   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
292   PetscScalar  *aa;
293   FILE         *file;
294 
295   PetscFunctionBegin;
296   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
297   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
298   col_lens[0] = MAT_FILE_COOKIE;
299 
300   col_lens[1] = A->m;
301   col_lens[2] = A->m;
302   col_lens[3] = a->s_nz*bs2;
303 
304   /* store lengths of each row and write (including header) to file */
305   count = 0;
306   for (i=0; i<a->mbs; i++) {
307     for (j=0; j<bs; j++) {
308       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
309     }
310   }
311   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
312   ierr = PetscFree(col_lens);CHKERRQ(ierr);
313 
314   /* store column indices (zero start index) */
315   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
316   count = 0;
317   for (i=0; i<a->mbs; i++) {
318     for (j=0; j<bs; j++) {
319       for (k=a->i[i]; k<a->i[i+1]; k++) {
320         for (l=0; l<bs; l++) {
321           jj[count++] = bs*a->j[k] + l;
322         }
323       }
324     }
325   }
326   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
327   ierr = PetscFree(jj);CHKERRQ(ierr);
328 
329   /* store nonzero values */
330   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
331   count = 0;
332   for (i=0; i<a->mbs; i++) {
333     for (j=0; j<bs; j++) {
334       for (k=a->i[i]; k<a->i[i+1]; k++) {
335         for (l=0; l<bs; l++) {
336           aa[count++] = a->a[bs2*k + l*bs + j];
337         }
338       }
339     }
340   }
341   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
342   ierr = PetscFree(aa);CHKERRQ(ierr);
343 
344   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
345   if (file) {
346     fprintf(file,"-matload_block_size %d\n",a->bs);
347   }
348   PetscFunctionReturn(0);
349 }
350 
351 #undef __FUNCT__
352 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
353 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
354 {
355   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
356   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
357   char              *name;
358   PetscViewerFormat format;
359 
360   PetscFunctionBegin;
361   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
362   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
363   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
364     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
365   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
366     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
367   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
368     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
369     for (i=0; i<a->mbs; i++) {
370       for (j=0; j<bs; j++) {
371         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
372         for (k=a->i[i]; k<a->i[i+1]; k++) {
373           for (l=0; l<bs; l++) {
374 #if defined(PETSC_USE_COMPLEX)
375             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
376               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
377                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
378             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
379               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
380                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
381             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
383             }
384 #else
385             if (a->a[bs2*k + l*bs + j] != 0.0) {
386               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
387             }
388 #endif
389           }
390         }
391         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
392       }
393     }
394     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
395   } else {
396     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
397     for (i=0; i<a->mbs; i++) {
398       for (j=0; j<bs; j++) {
399         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
400         for (k=a->i[i]; k<a->i[i+1]; k++) {
401           for (l=0; l<bs; l++) {
402 #if defined(PETSC_USE_COMPLEX)
403             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
404               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
405                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
406             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
407               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
408                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
409             } else {
410               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
411             }
412 #else
413             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
414 #endif
415           }
416         }
417         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
418       }
419     }
420     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
421   }
422   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
428 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
429 {
430   Mat          A = (Mat) Aa;
431   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
432   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
433   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
434   MatScalar    *aa;
435   MPI_Comm     comm;
436   PetscViewer  viewer;
437 
438   PetscFunctionBegin;
439   /*
440       This is nasty. If this is called from an originally parallel matrix
441    then all processes call this,but only the first has the matrix so the
442    rest should return immediately.
443   */
444   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
445   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
446   if (rank) PetscFunctionReturn(0);
447 
448   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
449 
450   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
451   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
452 
453   /* loop over matrix elements drawing boxes */
454   color = PETSC_DRAW_BLUE;
455   for (i=0,row=0; i<mbs; i++,row+=bs) {
456     for (j=a->i[i]; j<a->i[i+1]; j++) {
457       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
458       x_l = a->j[j]*bs; x_r = x_l + 1.0;
459       aa = a->a + j*bs2;
460       for (k=0; k<bs; k++) {
461         for (l=0; l<bs; l++) {
462           if (PetscRealPart(*aa++) >=  0.) continue;
463           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
464         }
465       }
466     }
467   }
468   color = PETSC_DRAW_CYAN;
469   for (i=0,row=0; i<mbs; i++,row+=bs) {
470     for (j=a->i[i]; j<a->i[i+1]; j++) {
471       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
472       x_l = a->j[j]*bs; x_r = x_l + 1.0;
473       aa = a->a + j*bs2;
474       for (k=0; k<bs; k++) {
475         for (l=0; l<bs; l++) {
476           if (PetscRealPart(*aa++) != 0.) continue;
477           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
478         }
479       }
480     }
481   }
482 
483   color = PETSC_DRAW_RED;
484   for (i=0,row=0; i<mbs; i++,row+=bs) {
485     for (j=a->i[i]; j<a->i[i+1]; j++) {
486       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
487       x_l = a->j[j]*bs; x_r = x_l + 1.0;
488       aa = a->a + j*bs2;
489       for (k=0; k<bs; k++) {
490         for (l=0; l<bs; l++) {
491           if (PetscRealPart(*aa++) <= 0.) continue;
492           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
493         }
494       }
495     }
496   }
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
502 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
503 {
504   int          ierr;
505   PetscReal    xl,yl,xr,yr,w,h;
506   PetscDraw    draw;
507   PetscTruth   isnull;
508 
509   PetscFunctionBegin;
510 
511   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
512   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
513 
514   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
515   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
516   xr += w;    yr += h;  xl = -w;     yl = -h;
517   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
518   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
519   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatView_SeqSBAIJ"
525 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
526 {
527   int        ierr;
528   PetscTruth issocket,isascii,isbinary,isdraw;
529 
530   PetscFunctionBegin;
531   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
532   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
533   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
534   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
535   if (issocket) {
536     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
537   } else if (isascii){
538     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
539   } else if (isbinary) {
540     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
541   } else if (isdraw) {
542     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
543   } else {
544     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
545   }
546   PetscFunctionReturn(0);
547 }
548 
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
552 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
553 {
554   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
555   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
556   int        *ai = a->i,*ailen = a->ilen;
557   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
558   MatScalar  *ap,*aa = a->a,zero = 0.0;
559 
560   PetscFunctionBegin;
561   for (k=0; k<m; k++) { /* loop over rows */
562     row  = im[k]; brow = row/bs;
563     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
564     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
565     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
566     nrow = ailen[brow];
567     for (l=0; l<n; l++) { /* loop over columns */
568       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
569       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
570       col  = in[l] ;
571       bcol = col/bs;
572       cidx = col%bs;
573       ridx = row%bs;
574       high = nrow;
575       low  = 0; /* assume unsorted */
576       while (high-low > 5) {
577         t = (low+high)/2;
578         if (rp[t] > bcol) high = t;
579         else             low  = t;
580       }
581       for (i=low; i<high; i++) {
582         if (rp[i] > bcol) break;
583         if (rp[i] == bcol) {
584           *v++ = ap[bs2*i+bs*cidx+ridx];
585           goto finished;
586         }
587       }
588       *v++ = zero;
589       finished:;
590     }
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 
596 #undef __FUNCT__
597 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
598 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
599 {
600   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
601   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
602   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
603   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
604   PetscTruth  roworiented=a->roworiented;
605   MatScalar   *value = v,*ap,*aa = a->a,*bap;
606 
607   PetscFunctionBegin;
608   if (roworiented) {
609     stepval = (n-1)*bs;
610   } else {
611     stepval = (m-1)*bs;
612   }
613   for (k=0; k<m; k++) { /* loop over added rows */
614     row  = im[k];
615     if (row < 0) continue;
616 #if defined(PETSC_USE_BOPT_g)
617     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
618 #endif
619     rp   = aj + ai[row];
620     ap   = aa + bs2*ai[row];
621     rmax = imax[row];
622     nrow = ailen[row];
623     low  = 0;
624     for (l=0; l<n; l++) { /* loop over added columns */
625       if (in[l] < 0) continue;
626 #if defined(PETSC_USE_BOPT_g)
627       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
628 #endif
629       col = in[l];
630       if (col < row) continue; /* ignore lower triangular block */
631       if (roworiented) {
632         value = v + k*(stepval+bs)*bs + l*bs;
633       } else {
634         value = v + l*(stepval+bs)*bs + k*bs;
635       }
636       if (!sorted) low = 0; high = nrow;
637       while (high-low > 7) {
638         t = (low+high)/2;
639         if (rp[t] > col) high = t;
640         else             low  = t;
641       }
642       for (i=low; i<high; i++) {
643         if (rp[i] > col) break;
644         if (rp[i] == col) {
645           bap  = ap +  bs2*i;
646           if (roworiented) {
647             if (is == ADD_VALUES) {
648               for (ii=0; ii<bs; ii++,value+=stepval) {
649                 for (jj=ii; jj<bs2; jj+=bs) {
650                   bap[jj] += *value++;
651                 }
652               }
653             } else {
654               for (ii=0; ii<bs; ii++,value+=stepval) {
655                 for (jj=ii; jj<bs2; jj+=bs) {
656                   bap[jj] = *value++;
657                 }
658               }
659             }
660           } else {
661             if (is == ADD_VALUES) {
662               for (ii=0; ii<bs; ii++,value+=stepval) {
663                 for (jj=0; jj<bs; jj++) {
664                   *bap++ += *value++;
665                 }
666               }
667             } else {
668               for (ii=0; ii<bs; ii++,value+=stepval) {
669                 for (jj=0; jj<bs; jj++) {
670                   *bap++  = *value++;
671                 }
672               }
673             }
674           }
675           goto noinsert2;
676         }
677       }
678       if (nonew == 1) goto noinsert2;
679       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
680       if (nrow >= rmax) {
681         /* there is no extra room in row, therefore enlarge */
682         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
683         MatScalar *new_a;
684 
685         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
686 
687         /* malloc new storage space */
688         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
689 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
690         new_j   = (int*)(new_a + bs2*new_nz);
691         new_i   = new_j + new_nz;
692 
693         /* copy over old data into new slots */
694         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
695         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
696         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
697         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
698         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
699         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
700         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
701         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
702         /* free up old matrix storage */
703         ierr = PetscFree(a->a);CHKERRQ(ierr);
704         if (!a->singlemalloc) {
705           ierr = PetscFree(a->i);CHKERRQ(ierr);
706           ierr = PetscFree(a->j);CHKERRQ(ierr);
707         }
708         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
709         a->singlemalloc = PETSC_TRUE;
710 
711         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
712         rmax = imax[row] = imax[row] + CHUNKSIZE;
713         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
714         a->s_maxnz += bs2*CHUNKSIZE;
715         a->reallocs++;
716         a->s_nz++;
717       }
718       N = nrow++ - 1;
719       /* shift up all the later entries in this row */
720       for (ii=N; ii>=i; ii--) {
721         rp[ii+1] = rp[ii];
722         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
723       }
724       if (N >= i) {
725         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
726       }
727       rp[i] = col;
728       bap   = ap +  bs2*i;
729       if (roworiented) {
730         for (ii=0; ii<bs; ii++,value+=stepval) {
731           for (jj=ii; jj<bs2; jj+=bs) {
732             bap[jj] = *value++;
733           }
734         }
735       } else {
736         for (ii=0; ii<bs; ii++,value+=stepval) {
737           for (jj=0; jj<bs; jj++) {
738             *bap++  = *value++;
739           }
740         }
741       }
742       noinsert2:;
743       low = i;
744     }
745     ailen[row] = nrow;
746   }
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
752 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
753 {
754   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
755   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
756   int        m = A->m,*ip,N,*ailen = a->ilen;
757   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
758   MatScalar  *aa = a->a,*ap;
759 #if defined(PETSC_HAVE_SPOOLES)
760   PetscTruth   flag;
761 #endif
762 
763   PetscFunctionBegin;
764   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
765 
766   if (m) rmax = ailen[0];
767   for (i=1; i<mbs; i++) {
768     /* move each row back by the amount of empty slots (fshift) before it*/
769     fshift += imax[i-1] - ailen[i-1];
770     rmax   = PetscMax(rmax,ailen[i]);
771     if (fshift) {
772       ip = aj + ai[i]; ap = aa + bs2*ai[i];
773       N = ailen[i];
774       for (j=0; j<N; j++) {
775         ip[j-fshift] = ip[j];
776         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
777       }
778     }
779     ai[i] = ai[i-1] + ailen[i-1];
780   }
781   if (mbs) {
782     fshift += imax[mbs-1] - ailen[mbs-1];
783     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
784   }
785   /* reset ilen and imax for each row */
786   for (i=0; i<mbs; i++) {
787     ailen[i] = imax[i] = ai[i+1] - ai[i];
788   }
789   a->s_nz = ai[mbs];
790 
791   /* diagonals may have moved, reset it */
792   if (a->diag) {
793     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
794   }
795   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
796            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
797   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
798            a->reallocs);
799   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
800   a->reallocs          = 0;
801   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
802 
803 #if defined(PETSC_HAVE_SPOOLES)
804   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
805   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); }
806 #endif
807 
808   PetscFunctionReturn(0);
809 }
810 
811 /*
812    This function returns an array of flags which indicate the locations of contiguous
813    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
814    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
815    Assume: sizes should be long enough to hold all the values.
816 */
817 #undef __FUNCT__
818 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
819 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
820 {
821   int        i,j,k,row;
822   PetscTruth flg;
823 
824   PetscFunctionBegin;
825   for (i=0,j=0; i<n; j++) {
826     row = idx[i];
827     if (row%bs!=0) { /* Not the begining of a block */
828       sizes[j] = 1;
829       i++;
830     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
831       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
832       i++;
833     } else { /* Begining of the block, so check if the complete block exists */
834       flg = PETSC_TRUE;
835       for (k=1; k<bs; k++) {
836         if (row+k != idx[i+k]) { /* break in the block */
837           flg = PETSC_FALSE;
838           break;
839         }
840       }
841       if (flg == PETSC_TRUE) { /* No break in the bs */
842         sizes[j] = bs;
843         i+= bs;
844       } else {
845         sizes[j] = 1;
846         i++;
847       }
848     }
849   }
850   *bs_max = j;
851   PetscFunctionReturn(0);
852 }
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
856 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag)
857 {
858   PetscFunctionBegin;
859   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
860 }
861 
862 /* Only add/insert a(i,j) with i<=j (blocks).
863    Any a(i,j) with i>j input by user is ingored.
864 */
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
868 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
869 {
870   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
871   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
872   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
873   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
874   int         ridx,cidx,bs2=a->bs2,ierr;
875   MatScalar   *ap,value,*aa=a->a,*bap;
876 
877   PetscFunctionBegin;
878 
879   for (k=0; k<m; k++) { /* loop over added rows */
880     row  = im[k];       /* row number */
881     brow = row/bs;      /* block row number */
882     if (row < 0) continue;
883 #if defined(PETSC_USE_BOPT_g)
884     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
885 #endif
886     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
887     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
888     rmax = imax[brow];  /* maximum space allocated for this row */
889     nrow = ailen[brow]; /* actual length of this row */
890     low  = 0;
891 
892     for (l=0; l<n; l++) { /* loop over added columns */
893       if (in[l] < 0) continue;
894 #if defined(PETSC_USE_BOPT_g)
895       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
896 #endif
897       col = in[l];
898       bcol = col/bs;              /* block col number */
899 
900       if (brow <= bcol){
901         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
902         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
903           /* element value a(k,l) */
904           if (roworiented) {
905             value = v[l + k*n];
906           } else {
907             value = v[k + l*m];
908           }
909 
910           /* move pointer bap to a(k,l) quickly and add/insert value */
911           if (!sorted) low = 0; high = nrow;
912           while (high-low > 7) {
913             t = (low+high)/2;
914             if (rp[t] > bcol) high = t;
915             else              low  = t;
916           }
917           for (i=low; i<high; i++) {
918             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
919             if (rp[i] > bcol) break;
920             if (rp[i] == bcol) {
921               bap  = ap +  bs2*i + bs*cidx + ridx;
922               if (is == ADD_VALUES) *bap += value;
923               else                  *bap  = value;
924               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
925               if (brow == bcol && ridx < cidx){
926                 bap  = ap +  bs2*i + bs*ridx + cidx;
927                 if (is == ADD_VALUES) *bap += value;
928                 else                  *bap  = value;
929               }
930               goto noinsert1;
931             }
932           }
933 
934       if (nonew == 1) goto noinsert1;
935       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
936       if (nrow >= rmax) {
937         /* there is no extra room in row, therefore enlarge */
938         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
939         MatScalar *new_a;
940 
941         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
942 
943         /* Malloc new storage space */
944         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
945         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
946         new_j = (int*)(new_a + bs2*new_nz);
947         new_i = new_j + new_nz;
948 
949         /* copy over old data into new slots */
950         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
951         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
952         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
953         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
954         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
955         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
956         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
957         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
958         /* free up old matrix storage */
959         ierr = PetscFree(a->a);CHKERRQ(ierr);
960         if (!a->singlemalloc) {
961           ierr = PetscFree(a->i);CHKERRQ(ierr);
962           ierr = PetscFree(a->j);CHKERRQ(ierr);
963         }
964         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
965         a->singlemalloc = PETSC_TRUE;
966 
967         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
968         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
969         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
970         a->s_maxnz += bs2*CHUNKSIZE;
971         a->reallocs++;
972         a->s_nz++;
973       }
974 
975       N = nrow++ - 1;
976       /* shift up all the later entries in this row */
977       for (ii=N; ii>=i; ii--) {
978         rp[ii+1] = rp[ii];
979         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
980       }
981       if (N>=i) {
982         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
983       }
984       rp[i]                      = bcol;
985       ap[bs2*i + bs*cidx + ridx] = value;
986       noinsert1:;
987       low = i;
988       /* } */
989         }
990       } /* end of if .. if..  */
991     }                     /* end of loop over added columns */
992     ailen[brow] = nrow;
993   }                       /* end of loop over added rows */
994 
995   PetscFunctionReturn(0);
996 }
997 
998 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
999 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
1000 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
1001 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1002 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1003 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
1004 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
1005 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat);
1006 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
1007 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1008 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1009 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1010 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1011 extern int MatZeroEntries_SeqSBAIJ(Mat);
1012 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1013 
1014 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1015 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1016 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1017 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1018 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1019 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1020 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1021 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1022 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1023 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1024 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1025 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1026 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1027 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1028 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1029 
1030 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1031 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1032 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1033 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1034 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1035 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1036 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1037 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1038 
1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1046 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1047 
1048 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1049 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1050 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1051 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1052 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1053 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1054 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1055 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1056 
1057 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1058 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1059 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1060 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1061 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1062 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1063 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1064 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1065 
1066 #ifdef HAVE_MatICCFactor
1067 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1070 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
1071 {
1072   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1073   Mat         outA;
1074   int         ierr;
1075   PetscTruth  row_identity,col_identity;
1076 
1077   PetscFunctionBegin;
1078   /*
1079   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1080   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1081   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1082   if (!row_identity || !col_identity) {
1083     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1084   }
1085   */
1086 
1087   outA          = inA;
1088   inA->factor   = FACTOR_CHOLESKY;
1089 
1090   if (!a->diag) {
1091     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1092   }
1093   /*
1094       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1095       for ILU(0) factorization with natural ordering
1096   */
1097   switch (a->bs) {
1098   case 1:
1099     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1100     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1101     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1102   case 2:
1103     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1104     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1105     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1106     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1107     break;
1108   case 3:
1109     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1110     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1111     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1112     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1113     break;
1114   case 4:
1115     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1116     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1117     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1118     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1119     break;
1120   case 5:
1121     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1122     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1123     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1124     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1125     break;
1126   case 6:
1127     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1128     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1129     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1130     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1131     break;
1132   case 7:
1133     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1134     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1135     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1136     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1137     break;
1138   default:
1139     a->row        = row;
1140     a->icol       = col;
1141     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1142     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1143 
1144     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1145     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1146     PetscLogObjectParent(inA,a->icol);
1147 
1148     if (!a->solve_work) {
1149       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1150       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1151     }
1152   }
1153 
1154   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1155 
1156   PetscFunctionReturn(0);
1157 }
1158 #endif
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1162 int MatPrintHelp_SeqSBAIJ(Mat A)
1163 {
1164   static PetscTruth called = PETSC_FALSE;
1165   MPI_Comm          comm = A->comm;
1166   int               ierr;
1167 
1168   PetscFunctionBegin;
1169   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1170   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1171   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 EXTERN_C_BEGIN
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1178 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1179 {
1180   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1181   int         i,nz,n;
1182 
1183   PetscFunctionBegin;
1184   nz = baij->s_maxnz;
1185   n  = mat->n;
1186   for (i=0; i<nz; i++) {
1187     baij->j[i] = indices[i];
1188   }
1189   baij->s_nz = nz;
1190   for (i=0; i<n; i++) {
1191     baij->ilen[i] = baij->imax[i];
1192   }
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 EXTERN_C_END
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1200 /*@
1201     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1202        in the matrix.
1203 
1204   Input Parameters:
1205 +  mat     - the SeqSBAIJ matrix
1206 -  indices - the column indices
1207 
1208   Level: advanced
1209 
1210   Notes:
1211     This can be called if you have precomputed the nonzero structure of the
1212   matrix and want to provide it to the matrix object to improve the performance
1213   of the MatSetValues() operation.
1214 
1215     You MUST have set the correct numbers of nonzeros per row in the call to
1216   MatCreateSeqSBAIJ().
1217 
1218     MUST be called before any calls to MatSetValues()
1219 
1220 .seealso: MatCreateSeqSBAIJ
1221 @*/
1222 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1223 {
1224   int ierr,(*f)(Mat,int *);
1225 
1226   PetscFunctionBegin;
1227   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1228   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1229   if (f) {
1230     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1231   } else {
1232     SETERRQ(1,"Wrong type of matrix to set column indices");
1233   }
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1239 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1240 {
1241   int        ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #include "petscblaslapack.h"
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1251 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1252 {
1253   int          ierr,one=1;
1254   Mat_SeqSBAIJ *x  = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data;
1255 
1256   PetscFunctionBegin;
1257   if (str == SAME_NONZERO_PATTERN) {
1258     BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one);
1259   } else {
1260     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 /* -------------------------------------------------------------------*/
1266 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1267        MatGetRow_SeqSBAIJ,
1268        MatRestoreRow_SeqSBAIJ,
1269        MatMult_SeqSBAIJ_N,
1270        MatMultAdd_SeqSBAIJ_N,
1271        MatMultTranspose_SeqSBAIJ,
1272        MatMultTransposeAdd_SeqSBAIJ,
1273        MatSolve_SeqSBAIJ_N,
1274        0,
1275        0,
1276        0,
1277        0,
1278        MatCholeskyFactor_SeqSBAIJ,
1279        MatRelax_SeqSBAIJ,
1280        MatTranspose_SeqSBAIJ,
1281        MatGetInfo_SeqSBAIJ,
1282        MatEqual_SeqSBAIJ,
1283        MatGetDiagonal_SeqSBAIJ,
1284        MatDiagonalScale_SeqSBAIJ,
1285        MatNorm_SeqSBAIJ,
1286        0,
1287        MatAssemblyEnd_SeqSBAIJ,
1288        0,
1289        MatSetOption_SeqSBAIJ,
1290        MatZeroEntries_SeqSBAIJ,
1291        MatZeroRows_SeqSBAIJ,
1292        0,
1293        0,
1294        MatCholeskyFactorSymbolic_SeqSBAIJ,
1295        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1296        MatSetUpPreallocation_SeqSBAIJ,
1297        0,
1298        MatICCFactorSymbolic_SeqSBAIJ,
1299        0,
1300        0,
1301        MatDuplicate_SeqSBAIJ,
1302        0,
1303        0,
1304        0,
1305        0,
1306        MatAXPY_SeqSBAIJ,
1307        MatGetSubMatrices_SeqSBAIJ,
1308        MatIncreaseOverlap_SeqSBAIJ,
1309        MatGetValues_SeqSBAIJ,
1310        0,
1311        MatPrintHelp_SeqSBAIJ,
1312        MatScale_SeqSBAIJ,
1313        0,
1314        0,
1315        0,
1316        MatGetBlockSize_SeqSBAIJ,
1317        MatGetRowIJ_SeqSBAIJ,
1318        MatRestoreRowIJ_SeqSBAIJ,
1319        0,
1320        0,
1321        0,
1322        0,
1323        0,
1324        0,
1325        MatSetValuesBlocked_SeqSBAIJ,
1326        MatGetSubMatrix_SeqSBAIJ,
1327        0,
1328        0,
1329        MatGetPetscMaps_Petsc,
1330        0,
1331        0,
1332        0,
1333        0,
1334        0,
1335        0,
1336        MatGetRowMax_SeqSBAIJ};
1337 
1338 EXTERN_C_BEGIN
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1341 int MatStoreValues_SeqSBAIJ(Mat mat)
1342 {
1343   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1344   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1345   int         ierr;
1346 
1347   PetscFunctionBegin;
1348   if (aij->nonew != 1) {
1349     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1350   }
1351 
1352   /* allocate space for values if not already there */
1353   if (!aij->saved_values) {
1354     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1355   }
1356 
1357   /* copy values over */
1358   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 EXTERN_C_END
1362 
1363 EXTERN_C_BEGIN
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1366 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1367 {
1368   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1369   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1370 
1371   PetscFunctionBegin;
1372   if (aij->nonew != 1) {
1373     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1374   }
1375   if (!aij->saved_values) {
1376     SETERRQ(1,"Must call MatStoreValues(A);first");
1377   }
1378 
1379   /* copy values over */
1380   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383 EXTERN_C_END
1384 
1385 EXTERN_C_BEGIN
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1388 int MatCreate_SeqSBAIJ(Mat B)
1389 {
1390   Mat_SeqSBAIJ *b;
1391   int          ierr,size;
1392 
1393   PetscFunctionBegin;
1394   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1395   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1396   B->m = B->M = PetscMax(B->m,B->M);
1397   B->n = B->N = PetscMax(B->n,B->N);
1398 
1399   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1400   B->data = (void*)b;
1401   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1402   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1403   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1404   B->ops->view        = MatView_SeqSBAIJ;
1405   B->factor           = 0;
1406   B->lupivotthreshold = 1.0;
1407   B->mapping          = 0;
1408   b->row              = 0;
1409   b->icol             = 0;
1410   b->reallocs         = 0;
1411   b->saved_values     = 0;
1412 
1413   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1414   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1415 
1416   b->sorted           = PETSC_FALSE;
1417   b->roworiented      = PETSC_TRUE;
1418   b->nonew            = 0;
1419   b->diag             = 0;
1420   b->solve_work       = 0;
1421   b->mult_work        = 0;
1422   B->spptr            = 0;
1423   b->keepzeroedrows   = PETSC_FALSE;
1424 
1425   b->inew             = 0;
1426   b->jnew             = 0;
1427   b->anew             = 0;
1428   b->a2anew           = 0;
1429   b->permute          = PETSC_FALSE;
1430 
1431   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1432                                      "MatStoreValues_SeqSBAIJ",
1433                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1435                                      "MatRetrieveValues_SeqSBAIJ",
1436                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1437   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1438                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1439                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1440   PetscFunctionReturn(0);
1441 }
1442 EXTERN_C_END
1443 
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1446 /*@C
1447    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1448    compressed row) format.  For good matrix assembly performance the
1449    user should preallocate the matrix storage by setting the parameter nz
1450    (or the array nnz).  By setting these parameters accurately, performance
1451    during matrix assembly can be increased by more than a factor of 50.
1452 
1453    Collective on Mat
1454 
1455    Input Parameters:
1456 +  A - the symmetric matrix
1457 .  bs - size of block
1458 .  nz - number of block nonzeros per block row (same for all rows)
1459 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1460          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1461 
1462    Options Database Keys:
1463 .   -mat_no_unroll - uses code that does not unroll the loops in the
1464                      block calculations (much slower)
1465 .    -mat_block_size - size of the blocks to use
1466 
1467    Level: intermediate
1468 
1469    Notes:
1470    Specify the preallocated storage with either nz or nnz (not both).
1471    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1472    allocation.  For additional details, see the users manual chapter on
1473    matrices.
1474 
1475 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1476 @*/
1477 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1478 {
1479   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1480   int          i,len,ierr,mbs,bs2;
1481   PetscTruth   flg;
1482   int          s_nz;
1483 
1484   PetscFunctionBegin;
1485   B->preallocated = PETSC_TRUE;
1486   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1487   mbs  = B->m/bs;
1488   bs2  = bs*bs;
1489 
1490   if (mbs*bs != B->m) {
1491     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1492   }
1493 
1494   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1495   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1496   if (nnz) {
1497     for (i=0; i<mbs; i++) {
1498       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1499       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1500     }
1501   }
1502 
1503   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1504   if (!flg) {
1505     switch (bs) {
1506     case 1:
1507       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1508       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1509       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1510       B->ops->mult            = MatMult_SeqSBAIJ_1;
1511       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1512       break;
1513     case 2:
1514       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1515       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1516       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1517       B->ops->mult            = MatMult_SeqSBAIJ_2;
1518       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1519       break;
1520     case 3:
1521       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1522       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1523       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1524       B->ops->mult            = MatMult_SeqSBAIJ_3;
1525       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1526       break;
1527     case 4:
1528       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1529       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1530       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1531       B->ops->mult            = MatMult_SeqSBAIJ_4;
1532       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1533       break;
1534     case 5:
1535       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1536       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1537       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1538       B->ops->mult            = MatMult_SeqSBAIJ_5;
1539       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1540       break;
1541     case 6:
1542       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1543       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1544       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1545       B->ops->mult            = MatMult_SeqSBAIJ_6;
1546       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1547       break;
1548     case 7:
1549       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1550       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1551       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1552       B->ops->mult            = MatMult_SeqSBAIJ_7;
1553       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1554       break;
1555     }
1556   }
1557 
1558   b->mbs = mbs;
1559   b->nbs = mbs;
1560   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1561   if (!nnz) {
1562     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1563     else if (nz <= 0)        nz = 1;
1564     for (i=0; i<mbs; i++) {
1565       b->imax[i] = nz;
1566     }
1567     nz = nz*mbs; /* total nz */
1568   } else {
1569     nz = 0;
1570     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1571   }
1572   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1573   s_nz = nz;
1574 
1575   /* allocate the matrix space */
1576   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1577   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1578   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1579   b->j = (int*)(b->a + s_nz*bs2);
1580   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1581   b->i = b->j + s_nz;
1582   b->singlemalloc = PETSC_TRUE;
1583 
1584   /* pointer to beginning of each row */
1585   b->i[0] = 0;
1586   for (i=1; i<mbs+1; i++) {
1587     b->i[i] = b->i[i-1] + b->imax[i-1];
1588   }
1589 
1590   /* b->ilen will count nonzeros in each block row so far. */
1591   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1592   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1593   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1594 
1595   b->bs               = bs;
1596   b->bs2              = bs2;
1597   b->s_nz             = 0;
1598   b->s_maxnz          = s_nz*bs2;
1599 
1600   b->inew             = 0;
1601   b->jnew             = 0;
1602   b->anew             = 0;
1603   b->a2anew           = 0;
1604   b->permute          = PETSC_FALSE;
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatCreateSeqSBAIJ"
1611 /*@C
1612    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1613    compressed row) format.  For good matrix assembly performance the
1614    user should preallocate the matrix storage by setting the parameter nz
1615    (or the array nnz).  By setting these parameters accurately, performance
1616    during matrix assembly can be increased by more than a factor of 50.
1617 
1618    Collective on MPI_Comm
1619 
1620    Input Parameters:
1621 +  comm - MPI communicator, set to PETSC_COMM_SELF
1622 .  bs - size of block
1623 .  m - number of rows, or number of columns
1624 .  nz - number of block nonzeros per block row (same for all rows)
1625 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1626          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1627 
1628    Output Parameter:
1629 .  A - the symmetric matrix
1630 
1631    Options Database Keys:
1632 .   -mat_no_unroll - uses code that does not unroll the loops in the
1633                      block calculations (much slower)
1634 .    -mat_block_size - size of the blocks to use
1635 
1636    Level: intermediate
1637 
1638    Notes:
1639 
1640    Specify the preallocated storage with either nz or nnz (not both).
1641    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1642    allocation.  For additional details, see the users manual chapter on
1643    matrices.
1644 
1645 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1646 @*/
1647 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1648 {
1649   int ierr;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1653   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1654   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 #undef __FUNCT__
1659 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1660 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1661 {
1662   Mat         C;
1663   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1664   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1665 
1666   PetscFunctionBegin;
1667   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1668 
1669   *B = 0;
1670   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1671   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1672   c    = (Mat_SeqSBAIJ*)C->data;
1673 
1674   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1675   C->preallocated   = PETSC_TRUE;
1676   C->factor         = A->factor;
1677   c->row            = 0;
1678   c->icol           = 0;
1679   c->saved_values   = 0;
1680   c->keepzeroedrows = a->keepzeroedrows;
1681   C->assembled      = PETSC_TRUE;
1682 
1683   c->bs         = a->bs;
1684   c->bs2        = a->bs2;
1685   c->mbs        = a->mbs;
1686   c->nbs        = a->nbs;
1687 
1688   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1689   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1690   for (i=0; i<mbs; i++) {
1691     c->imax[i] = a->imax[i];
1692     c->ilen[i] = a->ilen[i];
1693   }
1694 
1695   /* allocate the matrix space */
1696   c->singlemalloc = PETSC_TRUE;
1697   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1698   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1699   c->j = (int*)(c->a + nz*bs2);
1700   c->i = c->j + nz;
1701   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1702   if (mbs > 0) {
1703     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1704     if (cpvalues == MAT_COPY_VALUES) {
1705       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1706     } else {
1707       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1708     }
1709   }
1710 
1711   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1712   c->sorted      = a->sorted;
1713   c->roworiented = a->roworiented;
1714   c->nonew       = a->nonew;
1715 
1716   if (a->diag) {
1717     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1718     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1719     for (i=0; i<mbs; i++) {
1720       c->diag[i] = a->diag[i];
1721     }
1722   } else c->diag        = 0;
1723   c->s_nz               = a->s_nz;
1724   c->s_maxnz            = a->s_maxnz;
1725   c->solve_work         = 0;
1726   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1727   c->mult_work          = 0;
1728   *B = C;
1729   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 EXTERN_C_BEGIN
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1736 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1737 {
1738   Mat_SeqSBAIJ *a;
1739   Mat          B;
1740   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1741   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1742   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1743   int          *masked,nmask,tmp,bs2,ishift;
1744   PetscScalar  *aa;
1745   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1746 
1747   PetscFunctionBegin;
1748   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1749   bs2  = bs*bs;
1750 
1751   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1752   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1753   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1754   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1755   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1756   M = header[1]; N = header[2]; nz = header[3];
1757 
1758   if (header[3] < 0) {
1759     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1760   }
1761 
1762   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1763 
1764   /*
1765      This code adds extra rows to make sure the number of rows is
1766     divisible by the blocksize
1767   */
1768   mbs        = M/bs;
1769   extra_rows = bs - M + bs*(mbs);
1770   if (extra_rows == bs) extra_rows = 0;
1771   else                  mbs++;
1772   if (extra_rows) {
1773     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1774   }
1775 
1776   /* read in row lengths */
1777   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1778   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1779   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1780 
1781   /* read in column indices */
1782   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1783   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1784   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1785 
1786   /* loop over row lengths determining block row lengths */
1787   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1788   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1789   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1790   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1791   masked   = mask + mbs;
1792   rowcount = 0; nzcount = 0;
1793   for (i=0; i<mbs; i++) {
1794     nmask = 0;
1795     for (j=0; j<bs; j++) {
1796       kmax = rowlengths[rowcount];
1797       for (k=0; k<kmax; k++) {
1798         tmp = jj[nzcount++]/bs;   /* block col. index */
1799         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1800       }
1801       rowcount++;
1802     }
1803     s_browlengths[i] += nmask;
1804 
1805     /* zero out the mask elements we set */
1806     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1807   }
1808 
1809   /* create our matrix */
1810   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr);
1811   B = *A;
1812   a = (Mat_SeqSBAIJ*)B->data;
1813 
1814   /* set matrix "i" values */
1815   a->i[0] = 0;
1816   for (i=1; i<= mbs; i++) {
1817     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1818     a->ilen[i-1] = s_browlengths[i-1];
1819   }
1820   a->s_nz = a->i[mbs];
1821 
1822   /* read in nonzero values */
1823   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1824   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1825   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1826 
1827   /* set "a" and "j" values into matrix */
1828   nzcount = 0; jcount = 0;
1829   for (i=0; i<mbs; i++) {
1830     nzcountb = nzcount;
1831     nmask    = 0;
1832     for (j=0; j<bs; j++) {
1833       kmax = rowlengths[i*bs+j];
1834       for (k=0; k<kmax; k++) {
1835         tmp = jj[nzcount++]/bs; /* block col. index */
1836         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1837       }
1838     }
1839     /* sort the masked values */
1840     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1841 
1842     /* set "j" values into matrix */
1843     maskcount = 1;
1844     for (j=0; j<nmask; j++) {
1845       a->j[jcount++]  = masked[j];
1846       mask[masked[j]] = maskcount++;
1847     }
1848 
1849     /* set "a" values into matrix */
1850     ishift = bs2*a->i[i];
1851     for (j=0; j<bs; j++) {
1852       kmax = rowlengths[i*bs+j];
1853       for (k=0; k<kmax; k++) {
1854         tmp       = jj[nzcountb]/bs ; /* block col. index */
1855         if (tmp >= i){
1856           block     = mask[tmp] - 1;
1857           point     = jj[nzcountb] - bs*tmp;
1858           idx       = ishift + bs2*block + j + bs*point;
1859           a->a[idx] = aa[nzcountb];
1860         }
1861         nzcountb++;
1862       }
1863     }
1864     /* zero out the mask elements we set */
1865     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1866   }
1867   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1868 
1869   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1870   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1871   ierr = PetscFree(aa);CHKERRQ(ierr);
1872   ierr = PetscFree(jj);CHKERRQ(ierr);
1873   ierr = PetscFree(mask);CHKERRQ(ierr);
1874 
1875   B->assembled = PETSC_TRUE;
1876   ierr = MatView_Private(B);CHKERRQ(ierr);
1877   PetscFunctionReturn(0);
1878 }
1879 EXTERN_C_END
1880 
1881 #undef __FUNCT__
1882 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1883 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1884 {
1885   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1886   MatScalar    *aa=a->a,*v,*v1;
1887   PetscScalar  *x,*b,*t,sum,d;
1888   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1889   int          nz,nz1,*vj,*vj1,i;
1890 
1891   PetscFunctionBegin;
1892   its = its*lits;
1893   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1894 
1895   if (bs > 1)
1896     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1897 
1898   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1899   if (xx != bb) {
1900     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1901   } else {
1902     b = x;
1903   }
1904 
1905   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1906 
1907   if (flag & SOR_ZERO_INITIAL_GUESS) {
1908     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1909       for (i=0; i<m; i++)
1910         t[i] = b[i];
1911 
1912       for (i=0; i<m; i++){
1913         d  = *(aa + ai[i]);  /* diag[i] */
1914         v  = aa + ai[i] + 1;
1915         vj = aj + ai[i] + 1;
1916         nz = ai[i+1] - ai[i] - 1;
1917         x[i] = omega*t[i]/d;
1918         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1919         PetscLogFlops(2*nz-1);
1920       }
1921     }
1922 
1923     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1924       for (i=0; i<m; i++)
1925         t[i] = b[i];
1926 
1927       for (i=0; i<m-1; i++){  /* update rhs */
1928         v  = aa + ai[i] + 1;
1929         vj = aj + ai[i] + 1;
1930         nz = ai[i+1] - ai[i] - 1;
1931         while (nz--) t[*vj++] -= x[i]*(*v++);
1932         PetscLogFlops(2*nz-1);
1933       }
1934       for (i=m-1; i>=0; i--){
1935         d  = *(aa + ai[i]);
1936         v  = aa + ai[i] + 1;
1937         vj = aj + ai[i] + 1;
1938         nz = ai[i+1] - ai[i] - 1;
1939         sum = t[i];
1940         while (nz--) sum -= x[*vj++]*(*v++);
1941         PetscLogFlops(2*nz-1);
1942         x[i] =   (1-omega)*x[i] + omega*sum/d;
1943       }
1944     }
1945     its--;
1946   }
1947 
1948   while (its--) {
1949     /*
1950        forward sweep:
1951        for i=0,...,m-1:
1952          sum[i] = (b[i] - U(i,:)x )/d[i];
1953          x[i]   = (1-omega)x[i] + omega*sum[i];
1954          b      = b - x[i]*U^T(i,:);
1955 
1956     */
1957     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1958       for (i=0; i<m; i++)
1959         t[i] = b[i];
1960 
1961       for (i=0; i<m; i++){
1962         d  = *(aa + ai[i]);  /* diag[i] */
1963         v  = aa + ai[i] + 1; v1=v;
1964         vj = aj + ai[i] + 1; vj1=vj;
1965         nz = ai[i+1] - ai[i] - 1; nz1=nz;
1966         sum = t[i];
1967         while (nz1--) sum -= (*v1++)*x[*vj1++];
1968         x[i] = (1-omega)*x[i] + omega*sum/d;
1969         while (nz--) t[*vj++] -= x[i]*(*v++);
1970         PetscLogFlops(4*nz-2);
1971       }
1972     }
1973 
1974   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1975       /*
1976        backward sweep:
1977        b = b - x[i]*U^T(i,:), i=0,...,n-2
1978        for i=m-1,...,0:
1979          sum[i] = (b[i] - U(i,:)x )/d[i];
1980          x[i]   = (1-omega)x[i] + omega*sum[i];
1981       */
1982       for (i=0; i<m; i++)
1983         t[i] = b[i];
1984 
1985       for (i=0; i<m-1; i++){  /* update rhs */
1986         v  = aa + ai[i] + 1;
1987         vj = aj + ai[i] + 1;
1988         nz = ai[i+1] - ai[i] - 1;
1989         while (nz--) t[*vj++] -= x[i]*(*v++);
1990         PetscLogFlops(2*nz-1);
1991       }
1992       for (i=m-1; i>=0; i--){
1993         d  = *(aa + ai[i]);
1994         v  = aa + ai[i] + 1;
1995         vj = aj + ai[i] + 1;
1996         nz = ai[i+1] - ai[i] - 1;
1997         sum = t[i];
1998         while (nz--) sum -= x[*vj++]*(*v++);
1999         PetscLogFlops(2*nz-1);
2000         x[i] =   (1-omega)*x[i] + omega*sum/d;
2001       }
2002     }
2003   }
2004 
2005   ierr = PetscFree(t); CHKERRQ(ierr);
2006   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2007   if (bb != xx) {
2008     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2009   }
2010 
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 
2015 
2016 
2017 
2018 
2019