xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision a2ec6df8280ae283e8b0f45d20ebe04f5d3dfc7b)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   default:
183     SETERRQ(PETSC_ERR_SUP,"unknown option");
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
191 {
192   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
193   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
194   MatScalar    *aa,*aa_i;
195   PetscScalar  *v_i;
196 
197   PetscFunctionBegin;
198   bs  = a->bs;
199   ai  = a->i;
200   aj  = a->j;
201   aa  = a->a;
202   bs2 = a->bs2;
203 
204   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
205 
206   bn  = row/bs;   /* Block number */
207   bp  = row % bs; /* Block position */
208   M   = ai[bn+1] - ai[bn];
209   *ncols = bs*M;
210 
211   if (v) {
212     *v = 0;
213     if (*ncols) {
214       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
215       for (i=0; i<M; i++) { /* for each block in the block row */
216         v_i  = *v + i*bs;
217         aa_i = aa + bs2*(ai[bn] + i);
218         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
219       }
220     }
221   }
222 
223   if (cols) {
224     *cols = 0;
225     if (*ncols) {
226       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
227       for (i=0; i<M; i++) { /* for each block in the block row */
228         cols_i = *cols + i*bs;
229         itmp  = bs*aj[ai[bn] + i];
230         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
231       }
232     }
233   }
234 
235   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
236   /* this segment is currently removed, so only entries in the upper triangle are obtained */
237 #ifdef column_search
238   v_i    = *v    + M*bs;
239   cols_i = *cols + M*bs;
240   for (i=0; i<bn; i++){ /* for each block row */
241     M = ai[i+1] - ai[i];
242     for (j=0; j<M; j++){
243       itmp = aj[ai[i] + j];    /* block column value */
244       if (itmp == bn){
245         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
246         for (k=0; k<bs; k++) {
247           *cols_i++ = i*bs+k;
248           *v_i++    = aa_i[k];
249         }
250         *ncols += bs;
251         break;
252       }
253     }
254   }
255 #endif
256 
257   PetscFunctionReturn(0);
258 }
259 
260 #undef __FUNCT__
261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
263 {
264   int ierr;
265 
266   PetscFunctionBegin;
267   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
268   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
275 {
276   int ierr;
277   PetscFunctionBegin;
278   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
285 {
286   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
287   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
288   PetscScalar  *aa;
289   FILE         *file;
290 
291   PetscFunctionBegin;
292   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
293   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
294   col_lens[0] = MAT_FILE_COOKIE;
295 
296   col_lens[1] = A->m;
297   col_lens[2] = A->m;
298   col_lens[3] = a->s_nz*bs2;
299 
300   /* store lengths of each row and write (including header) to file */
301   count = 0;
302   for (i=0; i<a->mbs; i++) {
303     for (j=0; j<bs; j++) {
304       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
305     }
306   }
307   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
308   ierr = PetscFree(col_lens);CHKERRQ(ierr);
309 
310   /* store column indices (zero start index) */
311   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
312   count = 0;
313   for (i=0; i<a->mbs; i++) {
314     for (j=0; j<bs; j++) {
315       for (k=a->i[i]; k<a->i[i+1]; k++) {
316         for (l=0; l<bs; l++) {
317           jj[count++] = bs*a->j[k] + l;
318         }
319       }
320     }
321   }
322   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
323   ierr = PetscFree(jj);CHKERRQ(ierr);
324 
325   /* store nonzero values */
326   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
327   count = 0;
328   for (i=0; i<a->mbs; i++) {
329     for (j=0; j<bs; j++) {
330       for (k=a->i[i]; k<a->i[i+1]; k++) {
331         for (l=0; l<bs; l++) {
332           aa[count++] = a->a[bs2*k + l*bs + j];
333         }
334       }
335     }
336   }
337   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
338   ierr = PetscFree(aa);CHKERRQ(ierr);
339 
340   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
341   if (file) {
342     fprintf(file,"-matload_block_size %d\n",a->bs);
343   }
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
350 {
351   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
352   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
353   char              *name;
354   PetscViewerFormat format;
355 
356   PetscFunctionBegin;
357   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
358   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
359   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
360     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
361   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
362     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
363   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
364     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
365     for (i=0; i<a->mbs; i++) {
366       for (j=0; j<bs; j++) {
367         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
368         for (k=a->i[i]; k<a->i[i+1]; k++) {
369           for (l=0; l<bs; l++) {
370 #if defined(PETSC_USE_COMPLEX)
371             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
372               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
373                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
374             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
375               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
376                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
377             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
378               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
379             }
380 #else
381             if (a->a[bs2*k + l*bs + j] != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
383             }
384 #endif
385           }
386         }
387         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
388       }
389     }
390     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
391   } else {
392     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
393     for (i=0; i<a->mbs; i++) {
394       for (j=0; j<bs; j++) {
395         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
396         for (k=a->i[i]; k<a->i[i+1]; k++) {
397           for (l=0; l<bs; l++) {
398 #if defined(PETSC_USE_COMPLEX)
399             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
400               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
401                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
402             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
403               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
404                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
405             } else {
406               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
407             }
408 #else
409             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
410 #endif
411           }
412         }
413         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
414       }
415     }
416     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
417   }
418   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
425 {
426   Mat          A = (Mat) Aa;
427   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
428   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
429   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
430   MatScalar    *aa;
431   MPI_Comm     comm;
432   PetscViewer  viewer;
433 
434   PetscFunctionBegin;
435   /*
436       This is nasty. If this is called from an originally parallel matrix
437    then all processes call this,but only the first has the matrix so the
438    rest should return immediately.
439   */
440   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
441   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
442   if (rank) PetscFunctionReturn(0);
443 
444   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
445 
446   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
447   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
448 
449   /* loop over matrix elements drawing boxes */
450   color = PETSC_DRAW_BLUE;
451   for (i=0,row=0; i<mbs; i++,row+=bs) {
452     for (j=a->i[i]; j<a->i[i+1]; j++) {
453       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
454       x_l = a->j[j]*bs; x_r = x_l + 1.0;
455       aa = a->a + j*bs2;
456       for (k=0; k<bs; k++) {
457         for (l=0; l<bs; l++) {
458           if (PetscRealPart(*aa++) >=  0.) continue;
459           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
460         }
461       }
462     }
463   }
464   color = PETSC_DRAW_CYAN;
465   for (i=0,row=0; i<mbs; i++,row+=bs) {
466     for (j=a->i[i]; j<a->i[i+1]; j++) {
467       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
468       x_l = a->j[j]*bs; x_r = x_l + 1.0;
469       aa = a->a + j*bs2;
470       for (k=0; k<bs; k++) {
471         for (l=0; l<bs; l++) {
472           if (PetscRealPart(*aa++) != 0.) continue;
473           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
474         }
475       }
476     }
477   }
478 
479   color = PETSC_DRAW_RED;
480   for (i=0,row=0; i<mbs; i++,row+=bs) {
481     for (j=a->i[i]; j<a->i[i+1]; j++) {
482       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
483       x_l = a->j[j]*bs; x_r = x_l + 1.0;
484       aa = a->a + j*bs2;
485       for (k=0; k<bs; k++) {
486         for (l=0; l<bs; l++) {
487           if (PetscRealPart(*aa++) <= 0.) continue;
488           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
499 {
500   int          ierr;
501   PetscReal    xl,yl,xr,yr,w,h;
502   PetscDraw    draw;
503   PetscTruth   isnull;
504 
505   PetscFunctionBegin;
506 
507   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
508   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
509 
510   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
511   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
512   xr += w;    yr += h;  xl = -w;     yl = -h;
513   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
514   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
515   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 #undef __FUNCT__
520 #define __FUNCT__ "MatView_SeqSBAIJ"
521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
522 {
523   int        ierr;
524   PetscTruth issocket,isascii,isbinary,isdraw;
525 
526   PetscFunctionBegin;
527   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
529   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
530   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
531   if (issocket) {
532     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
533   } else if (isascii){
534     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
535   } else if (isbinary) {
536     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
537   } else if (isdraw) {
538     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
539   } else {
540     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
541   }
542   PetscFunctionReturn(0);
543 }
544 
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
548 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
549 {
550   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
551   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
552   int        *ai = a->i,*ailen = a->ilen;
553   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
554   MatScalar  *ap,*aa = a->a,zero = 0.0;
555 
556   PetscFunctionBegin;
557   for (k=0; k<m; k++) { /* loop over rows */
558     row  = im[k]; brow = row/bs;
559     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
560     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
561     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
562     nrow = ailen[brow];
563     for (l=0; l<n; l++) { /* loop over columns */
564       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
565       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
566       col  = in[l] ;
567       bcol = col/bs;
568       cidx = col%bs;
569       ridx = row%bs;
570       high = nrow;
571       low  = 0; /* assume unsorted */
572       while (high-low > 5) {
573         t = (low+high)/2;
574         if (rp[t] > bcol) high = t;
575         else             low  = t;
576       }
577       for (i=low; i<high; i++) {
578         if (rp[i] > bcol) break;
579         if (rp[i] == bcol) {
580           *v++ = ap[bs2*i+bs*cidx+ridx];
581           goto finished;
582         }
583       }
584       *v++ = zero;
585       finished:;
586     }
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 
592 #undef __FUNCT__
593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
595 {
596   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
597   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
598   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
599   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
600   PetscTruth      roworiented=a->roworiented;
601   const MatScalar *value = v;
602   MatScalar       *ap,*aa = a->a,*bap;
603 
604   PetscFunctionBegin;
605   if (roworiented) {
606     stepval = (n-1)*bs;
607   } else {
608     stepval = (m-1)*bs;
609   }
610   for (k=0; k<m; k++) { /* loop over added rows */
611     row  = im[k];
612     if (row < 0) continue;
613 #if defined(PETSC_USE_BOPT_g)
614     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
615 #endif
616     rp   = aj + ai[row];
617     ap   = aa + bs2*ai[row];
618     rmax = imax[row];
619     nrow = ailen[row];
620     low  = 0;
621     for (l=0; l<n; l++) { /* loop over added columns */
622       if (in[l] < 0) continue;
623       col = in[l];
624 #if defined(PETSC_USE_BOPT_g)
625       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
626 #endif
627       if (col < row) continue; /* ignore lower triangular block */
628       if (roworiented) {
629         value = v + k*(stepval+bs)*bs + l*bs;
630       } else {
631         value = v + l*(stepval+bs)*bs + k*bs;
632       }
633       if (!sorted) low = 0; high = nrow;
634       while (high-low > 7) {
635         t = (low+high)/2;
636         if (rp[t] > col) high = t;
637         else             low  = t;
638       }
639       for (i=low; i<high; i++) {
640         if (rp[i] > col) break;
641         if (rp[i] == col) {
642           bap  = ap +  bs2*i;
643           if (roworiented) {
644             if (is == ADD_VALUES) {
645               for (ii=0; ii<bs; ii++,value+=stepval) {
646                 for (jj=ii; jj<bs2; jj+=bs) {
647                   bap[jj] += *value++;
648                 }
649               }
650             } else {
651               for (ii=0; ii<bs; ii++,value+=stepval) {
652                 for (jj=ii; jj<bs2; jj+=bs) {
653                   bap[jj] = *value++;
654                 }
655               }
656             }
657           } else {
658             if (is == ADD_VALUES) {
659               for (ii=0; ii<bs; ii++,value+=stepval) {
660                 for (jj=0; jj<bs; jj++) {
661                   *bap++ += *value++;
662                 }
663               }
664             } else {
665               for (ii=0; ii<bs; ii++,value+=stepval) {
666                 for (jj=0; jj<bs; jj++) {
667                   *bap++  = *value++;
668                 }
669               }
670             }
671           }
672           goto noinsert2;
673         }
674       }
675       if (nonew == 1) goto noinsert2;
676       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
677       if (nrow >= rmax) {
678         /* there is no extra room in row, therefore enlarge */
679         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
680         MatScalar *new_a;
681 
682         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
683 
684         /* malloc new storage space */
685         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
686 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
687         new_j   = (int*)(new_a + bs2*new_nz);
688         new_i   = new_j + new_nz;
689 
690         /* copy over old data into new slots */
691         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
692         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
693         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
694         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
695         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
696         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
697         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
698         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
699         /* free up old matrix storage */
700         ierr = PetscFree(a->a);CHKERRQ(ierr);
701         if (!a->singlemalloc) {
702           ierr = PetscFree(a->i);CHKERRQ(ierr);
703           ierr = PetscFree(a->j);CHKERRQ(ierr);
704         }
705         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
706         a->singlemalloc = PETSC_TRUE;
707 
708         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
709         rmax = imax[row] = imax[row] + CHUNKSIZE;
710         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
711         a->s_maxnz += bs2*CHUNKSIZE;
712         a->reallocs++;
713         a->s_nz++;
714       }
715       N = nrow++ - 1;
716       /* shift up all the later entries in this row */
717       for (ii=N; ii>=i; ii--) {
718         rp[ii+1] = rp[ii];
719         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
720       }
721       if (N >= i) {
722         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
723       }
724       rp[i] = col;
725       bap   = ap +  bs2*i;
726       if (roworiented) {
727         for (ii=0; ii<bs; ii++,value+=stepval) {
728           for (jj=ii; jj<bs2; jj+=bs) {
729             bap[jj] = *value++;
730           }
731         }
732       } else {
733         for (ii=0; ii<bs; ii++,value+=stepval) {
734           for (jj=0; jj<bs; jj++) {
735             *bap++  = *value++;
736           }
737         }
738       }
739       noinsert2:;
740       low = i;
741     }
742     ailen[row] = nrow;
743   }
744   PetscFunctionReturn(0);
745 }
746 
747 #undef __FUNCT__
748 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
749 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
750 {
751   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
752   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
753   int        m = A->m,*ip,N,*ailen = a->ilen;
754   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
755   MatScalar  *aa = a->a,*ap;
756 
757   PetscFunctionBegin;
758   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
759 
760   if (m) rmax = ailen[0];
761   for (i=1; i<mbs; i++) {
762     /* move each row back by the amount of empty slots (fshift) before it*/
763     fshift += imax[i-1] - ailen[i-1];
764     rmax   = PetscMax(rmax,ailen[i]);
765     if (fshift) {
766       ip = aj + ai[i]; ap = aa + bs2*ai[i];
767       N = ailen[i];
768       for (j=0; j<N; j++) {
769         ip[j-fshift] = ip[j];
770         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
771       }
772     }
773     ai[i] = ai[i-1] + ailen[i-1];
774   }
775   if (mbs) {
776     fshift += imax[mbs-1] - ailen[mbs-1];
777     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
778   }
779   /* reset ilen and imax for each row */
780   for (i=0; i<mbs; i++) {
781     ailen[i] = imax[i] = ai[i+1] - ai[i];
782   }
783   a->s_nz = ai[mbs];
784 
785   /* diagonals may have moved, reset it */
786   if (a->diag) {
787     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
788   }
789   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
790            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
791   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
792            a->reallocs);
793   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
794   a->reallocs          = 0;
795   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
796 
797   PetscFunctionReturn(0);
798 }
799 
800 /*
801    This function returns an array of flags which indicate the locations of contiguous
802    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
803    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
804    Assume: sizes should be long enough to hold all the values.
805 */
806 #undef __FUNCT__
807 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
808 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
809 {
810   int        i,j,k,row;
811   PetscTruth flg;
812 
813   PetscFunctionBegin;
814   for (i=0,j=0; i<n; j++) {
815     row = idx[i];
816     if (row%bs!=0) { /* Not the begining of a block */
817       sizes[j] = 1;
818       i++;
819     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
820       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
821       i++;
822     } else { /* Begining of the block, so check if the complete block exists */
823       flg = PETSC_TRUE;
824       for (k=1; k<bs; k++) {
825         if (row+k != idx[i+k]) { /* break in the block */
826           flg = PETSC_FALSE;
827           break;
828         }
829       }
830       if (flg == PETSC_TRUE) { /* No break in the bs */
831         sizes[j] = bs;
832         i+= bs;
833       } else {
834         sizes[j] = 1;
835         i++;
836       }
837     }
838   }
839   *bs_max = j;
840   PetscFunctionReturn(0);
841 }
842 
843 #undef __FUNCT__
844 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
845 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
846 {
847   PetscFunctionBegin;
848   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
849 }
850 
851 /* Only add/insert a(i,j) with i<=j (blocks).
852    Any a(i,j) with i>j input by user is ingored.
853 */
854 
855 #undef __FUNCT__
856 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
857 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
858 {
859   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
860   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
861   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
862   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
863   int         ridx,cidx,bs2=a->bs2,ierr;
864   MatScalar   *ap,value,*aa=a->a,*bap;
865 
866   PetscFunctionBegin;
867 
868   for (k=0; k<m; k++) { /* loop over added rows */
869     row  = im[k];       /* row number */
870     brow = row/bs;      /* block row number */
871     if (row < 0) continue;
872 #if defined(PETSC_USE_BOPT_g)
873     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
874 #endif
875     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
876     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
877     rmax = imax[brow];  /* maximum space allocated for this row */
878     nrow = ailen[brow]; /* actual length of this row */
879     low  = 0;
880 
881     for (l=0; l<n; l++) { /* loop over added columns */
882       if (in[l] < 0) continue;
883 #if defined(PETSC_USE_BOPT_g)
884       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
885 #endif
886       col = in[l];
887       bcol = col/bs;              /* block col number */
888 
889       if (brow <= bcol){
890         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
891         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
892           /* element value a(k,l) */
893           if (roworiented) {
894             value = v[l + k*n];
895           } else {
896             value = v[k + l*m];
897           }
898 
899           /* move pointer bap to a(k,l) quickly and add/insert value */
900           if (!sorted) low = 0; high = nrow;
901           while (high-low > 7) {
902             t = (low+high)/2;
903             if (rp[t] > bcol) high = t;
904             else              low  = t;
905           }
906           for (i=low; i<high; i++) {
907             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
908             if (rp[i] > bcol) break;
909             if (rp[i] == bcol) {
910               bap  = ap +  bs2*i + bs*cidx + ridx;
911               if (is == ADD_VALUES) *bap += value;
912               else                  *bap  = value;
913               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
914               if (brow == bcol && ridx < cidx){
915                 bap  = ap +  bs2*i + bs*ridx + cidx;
916                 if (is == ADD_VALUES) *bap += value;
917                 else                  *bap  = value;
918               }
919               goto noinsert1;
920             }
921           }
922 
923       if (nonew == 1) goto noinsert1;
924       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
925       if (nrow >= rmax) {
926         /* there is no extra room in row, therefore enlarge */
927         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
928         MatScalar *new_a;
929 
930         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
931 
932         /* Malloc new storage space */
933         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
934         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
935         new_j = (int*)(new_a + bs2*new_nz);
936         new_i = new_j + new_nz;
937 
938         /* copy over old data into new slots */
939         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
940         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
941         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
942         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
943         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
944         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
945         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
946         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
947         /* free up old matrix storage */
948         ierr = PetscFree(a->a);CHKERRQ(ierr);
949         if (!a->singlemalloc) {
950           ierr = PetscFree(a->i);CHKERRQ(ierr);
951           ierr = PetscFree(a->j);CHKERRQ(ierr);
952         }
953         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
954         a->singlemalloc = PETSC_TRUE;
955 
956         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
957         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
958         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
959         a->s_maxnz += bs2*CHUNKSIZE;
960         a->reallocs++;
961         a->s_nz++;
962       }
963 
964       N = nrow++ - 1;
965       /* shift up all the later entries in this row */
966       for (ii=N; ii>=i; ii--) {
967         rp[ii+1] = rp[ii];
968         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
969       }
970       if (N>=i) {
971         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
972       }
973       rp[i]                      = bcol;
974       ap[bs2*i + bs*cidx + ridx] = value;
975       noinsert1:;
976       low = i;
977       /* } */
978         }
979       } /* end of if .. if..  */
980     }                     /* end of loop over added columns */
981     ailen[brow] = nrow;
982   }                       /* end of loop over added rows */
983 
984   PetscFunctionReturn(0);
985 }
986 
987 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
988 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
989 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
990 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
991 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
992 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
993 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
994 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
995 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
996 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
997 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
998 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
999 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1000 extern int MatZeroEntries_SeqSBAIJ(Mat);
1001 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1002 
1003 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1004 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1005 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1006 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1007 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1008 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1009 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1010 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1011 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1012 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1013 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1014 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1015 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1016 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1017 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1018 
1019 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1020 
1021 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1022 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1023 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1024 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1025 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1026 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1027 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1028 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1029 
1030 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1031 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1032 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1033 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1034 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1035 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1038 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1039 
1040 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1041 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1042 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1043 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1044 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1045 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1046 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1047 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1048 
1049 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1050 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1051 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1052 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1053 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1054 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1055 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1056 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1057 
1058 #ifdef HAVE_MatICCFactor
1059 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1062 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1063 {
1064   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1065   Mat         outA;
1066   int         ierr;
1067   PetscTruth  row_identity,col_identity;
1068 
1069   PetscFunctionBegin;
1070   /*
1071   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1072   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1073   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1074   if (!row_identity || !col_identity) {
1075     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1076   }
1077   */
1078 
1079   outA          = inA;
1080   inA->factor   = FACTOR_CHOLESKY;
1081 
1082   if (!a->diag) {
1083     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1084   }
1085   /*
1086       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1087       for ILU(0) factorization with natural ordering
1088   */
1089   switch (a->bs) {
1090   case 1:
1091     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1092     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1093     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1094     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1095   case 2:
1096     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1097     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1098     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1099     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1100     break;
1101   case 3:
1102     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1103     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1104     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1105     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1106     break;
1107   case 4:
1108     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1109     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1110     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1111     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1112     break;
1113   case 5:
1114     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1115     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1116     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1117     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1118     break;
1119   case 6:
1120     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1121     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1122     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1123     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1124     break;
1125   case 7:
1126     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1127     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1128     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1129     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1130     break;
1131   default:
1132     a->row        = row;
1133     a->icol       = col;
1134     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1135     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1136 
1137     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1138     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1139     PetscLogObjectParent(inA,a->icol);
1140 
1141     if (!a->solve_work) {
1142       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1143       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1144     }
1145   }
1146 
1147   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1148 
1149   PetscFunctionReturn(0);
1150 }
1151 #endif
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1155 int MatPrintHelp_SeqSBAIJ(Mat A)
1156 {
1157   static PetscTruth called = PETSC_FALSE;
1158   MPI_Comm          comm = A->comm;
1159   int               ierr;
1160 
1161   PetscFunctionBegin;
1162   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1163   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1164   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 EXTERN_C_BEGIN
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1171 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1172 {
1173   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1174   int         i,nz,n;
1175 
1176   PetscFunctionBegin;
1177   nz = baij->s_maxnz;
1178   n  = mat->n;
1179   for (i=0; i<nz; i++) {
1180     baij->j[i] = indices[i];
1181   }
1182   baij->s_nz = nz;
1183   for (i=0; i<n; i++) {
1184     baij->ilen[i] = baij->imax[i];
1185   }
1186 
1187   PetscFunctionReturn(0);
1188 }
1189 EXTERN_C_END
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1193 /*@
1194     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1195        in the matrix.
1196 
1197   Input Parameters:
1198 +  mat     - the SeqSBAIJ matrix
1199 -  indices - the column indices
1200 
1201   Level: advanced
1202 
1203   Notes:
1204     This can be called if you have precomputed the nonzero structure of the
1205   matrix and want to provide it to the matrix object to improve the performance
1206   of the MatSetValues() operation.
1207 
1208     You MUST have set the correct numbers of nonzeros per row in the call to
1209   MatCreateSeqSBAIJ().
1210 
1211     MUST be called before any calls to MatSetValues()
1212 
1213 .seealso: MatCreateSeqSBAIJ
1214 @*/
1215 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1216 {
1217   int ierr,(*f)(Mat,int *);
1218 
1219   PetscFunctionBegin;
1220   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1221   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1222   if (f) {
1223     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1224   } else {
1225     SETERRQ(1,"Wrong type of matrix to set column indices");
1226   }
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1232 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1233 {
1234   int        ierr;
1235 
1236   PetscFunctionBegin;
1237   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1243 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1244 {
1245   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1246   PetscFunctionBegin;
1247   *array = a->a;
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1253 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1254 {
1255   PetscFunctionBegin;
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #include "petscblaslapack.h"
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1262 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1263 {
1264   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1265   int          ierr,one=1,i,bs=y->bs,bs2,j;
1266 
1267   PetscFunctionBegin;
1268   if (str == SAME_NONZERO_PATTERN) {
1269     BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1270   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1271     if (y->xtoy && y->XtoY != X) {
1272       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1273       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1274     }
1275     if (!y->xtoy) { /* get xtoy */
1276       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1277       y->XtoY = X;
1278     }
1279     bs2 = bs*bs;
1280     for (i=0; i<x->s_nz; i++) {
1281       j = 0;
1282       while (j < bs2){
1283         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1284         j++;
1285       }
1286     }
1287     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz));
1288   } else {
1289     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1290   }
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 /* -------------------------------------------------------------------*/
1295 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1296        MatGetRow_SeqSBAIJ,
1297        MatRestoreRow_SeqSBAIJ,
1298        MatMult_SeqSBAIJ_N,
1299 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1300        MatMultTranspose_SeqSBAIJ,
1301        MatMultTransposeAdd_SeqSBAIJ,
1302        MatSolve_SeqSBAIJ_N,
1303        0,
1304        0,
1305 /*10*/ 0,
1306        0,
1307        MatCholeskyFactor_SeqSBAIJ,
1308        MatRelax_SeqSBAIJ,
1309        MatTranspose_SeqSBAIJ,
1310 /*15*/ MatGetInfo_SeqSBAIJ,
1311        MatEqual_SeqSBAIJ,
1312        MatGetDiagonal_SeqSBAIJ,
1313        MatDiagonalScale_SeqSBAIJ,
1314        MatNorm_SeqSBAIJ,
1315 /*20*/ 0,
1316        MatAssemblyEnd_SeqSBAIJ,
1317        0,
1318        MatSetOption_SeqSBAIJ,
1319        MatZeroEntries_SeqSBAIJ,
1320 /*25*/ MatZeroRows_SeqSBAIJ,
1321        0,
1322        0,
1323        MatCholeskyFactorSymbolic_SeqSBAIJ,
1324        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1325 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1326        0,
1327        MatICCFactorSymbolic_SeqSBAIJ,
1328        MatGetArray_SeqSBAIJ,
1329        MatRestoreArray_SeqSBAIJ,
1330 /*35*/ MatDuplicate_SeqSBAIJ,
1331        0,
1332        0,
1333        0,
1334        0,
1335 /*40*/ MatAXPY_SeqSBAIJ,
1336        MatGetSubMatrices_SeqSBAIJ,
1337        MatIncreaseOverlap_SeqSBAIJ,
1338        MatGetValues_SeqSBAIJ,
1339        0,
1340 /*45*/ MatPrintHelp_SeqSBAIJ,
1341        MatScale_SeqSBAIJ,
1342        0,
1343        0,
1344        0,
1345 /*50*/ MatGetBlockSize_SeqSBAIJ,
1346        MatGetRowIJ_SeqSBAIJ,
1347        MatRestoreRowIJ_SeqSBAIJ,
1348        0,
1349        0,
1350 /*55*/ 0,
1351        0,
1352        0,
1353        0,
1354        MatSetValuesBlocked_SeqSBAIJ,
1355 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1356        0,
1357        0,
1358        MatGetPetscMaps_Petsc,
1359        0,
1360 /*65*/ 0,
1361        0,
1362        0,
1363        0,
1364        0,
1365 /*70*/ MatGetRowMax_SeqSBAIJ,
1366        0,
1367        0,
1368        0,
1369        0,
1370 /*75*/ 0,
1371        0,
1372        0,
1373        0,
1374        0,
1375 /*80*/ 0,
1376        0,
1377        0,
1378        0,
1379 #if !defined(PETSC_USE_COMPLEX)
1380        MatGetInertia_SeqSBAIJ,
1381 #else
1382        0,
1383 #endif
1384 /*85*/ MatLoad_SeqSBAIJ
1385 };
1386 
1387 EXTERN_C_BEGIN
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1390 int MatStoreValues_SeqSBAIJ(Mat mat)
1391 {
1392   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1393   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1394   int         ierr;
1395 
1396   PetscFunctionBegin;
1397   if (aij->nonew != 1) {
1398     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1399   }
1400 
1401   /* allocate space for values if not already there */
1402   if (!aij->saved_values) {
1403     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1404   }
1405 
1406   /* copy values over */
1407   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 EXTERN_C_END
1411 
1412 EXTERN_C_BEGIN
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1415 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1416 {
1417   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1418   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1419 
1420   PetscFunctionBegin;
1421   if (aij->nonew != 1) {
1422     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1423   }
1424   if (!aij->saved_values) {
1425     SETERRQ(1,"Must call MatStoreValues(A);first");
1426   }
1427 
1428   /* copy values over */
1429   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 EXTERN_C_END
1433 
1434 EXTERN_C_BEGIN
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1437 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1438 {
1439   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1440   int          i,len,ierr,mbs,bs2;
1441   PetscTruth   flg;
1442   int          s_nz;
1443 
1444   PetscFunctionBegin;
1445   B->preallocated = PETSC_TRUE;
1446   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1447   mbs  = B->m/bs;
1448   bs2  = bs*bs;
1449 
1450   if (mbs*bs != B->m) {
1451     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1452   }
1453 
1454   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1455   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1456   if (nnz) {
1457     for (i=0; i<mbs; i++) {
1458       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1459       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1460     }
1461   }
1462 
1463   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1464   if (!flg) {
1465     switch (bs) {
1466     case 1:
1467       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1468       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1469       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1470       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1471       B->ops->mult            = MatMult_SeqSBAIJ_1;
1472       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1473       break;
1474     case 2:
1475       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1476       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1477       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1478       B->ops->mult            = MatMult_SeqSBAIJ_2;
1479       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1480       break;
1481     case 3:
1482       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1483       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1484       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1485       B->ops->mult            = MatMult_SeqSBAIJ_3;
1486       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1487       break;
1488     case 4:
1489       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1490       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1491       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1492       B->ops->mult            = MatMult_SeqSBAIJ_4;
1493       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1494       break;
1495     case 5:
1496       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1497       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1498       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1499       B->ops->mult            = MatMult_SeqSBAIJ_5;
1500       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1501       break;
1502     case 6:
1503       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1504       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1505       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1506       B->ops->mult            = MatMult_SeqSBAIJ_6;
1507       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1508       break;
1509     case 7:
1510       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1511       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1512       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1513       B->ops->mult            = MatMult_SeqSBAIJ_7;
1514       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1515       break;
1516     }
1517   }
1518 
1519   b->mbs = mbs;
1520   b->nbs = mbs;
1521   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1522   if (!nnz) {
1523     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1524     else if (nz <= 0)        nz = 1;
1525     for (i=0; i<mbs; i++) {
1526       b->imax[i] = nz;
1527     }
1528     nz = nz*mbs; /* total nz */
1529   } else {
1530     nz = 0;
1531     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1532   }
1533   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1534   s_nz = nz;
1535 
1536   /* allocate the matrix space */
1537   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1538   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1539   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1540   b->j = (int*)(b->a + s_nz*bs2);
1541   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1542   b->i = b->j + s_nz;
1543   b->singlemalloc = PETSC_TRUE;
1544 
1545   /* pointer to beginning of each row */
1546   b->i[0] = 0;
1547   for (i=1; i<mbs+1; i++) {
1548     b->i[i] = b->i[i-1] + b->imax[i-1];
1549   }
1550 
1551   /* b->ilen will count nonzeros in each block row so far. */
1552   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1553   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1554   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1555 
1556   b->bs               = bs;
1557   b->bs2              = bs2;
1558   b->s_nz             = 0;
1559   b->s_maxnz          = s_nz*bs2;
1560 
1561   b->inew             = 0;
1562   b->jnew             = 0;
1563   b->anew             = 0;
1564   b->a2anew           = 0;
1565   b->permute          = PETSC_FALSE;
1566   PetscFunctionReturn(0);
1567 }
1568 EXTERN_C_END
1569 
1570 /*MC
1571    MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1572    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1573 
1574    Options Database Keys:
1575 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1576 
1577   Level: beginner
1578 
1579 .seealso: MatCreateSeqSBAIJ
1580 M*/
1581 
1582 EXTERN_C_BEGIN
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1585 int MatCreate_SeqSBAIJ(Mat B)
1586 {
1587   Mat_SeqSBAIJ *b;
1588   int          ierr,size;
1589 
1590   PetscFunctionBegin;
1591   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1592   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1593   B->m = B->M = PetscMax(B->m,B->M);
1594   B->n = B->N = PetscMax(B->n,B->N);
1595 
1596   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1597   B->data = (void*)b;
1598   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1599   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1600   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1601   B->ops->view        = MatView_SeqSBAIJ;
1602   B->factor           = 0;
1603   B->lupivotthreshold = 1.0;
1604   B->mapping          = 0;
1605   b->row              = 0;
1606   b->icol             = 0;
1607   b->reallocs         = 0;
1608   b->saved_values     = 0;
1609 
1610   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1611   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1612 
1613   b->sorted           = PETSC_FALSE;
1614   b->roworiented      = PETSC_TRUE;
1615   b->nonew            = 0;
1616   b->diag             = 0;
1617   b->solve_work       = 0;
1618   b->mult_work        = 0;
1619   B->spptr            = 0;
1620   b->keepzeroedrows   = PETSC_FALSE;
1621   b->xtoy             = 0;
1622   b->XtoY             = 0;
1623 
1624   b->inew             = 0;
1625   b->jnew             = 0;
1626   b->anew             = 0;
1627   b->a2anew           = 0;
1628   b->permute          = PETSC_FALSE;
1629 
1630   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1631                                      "MatStoreValues_SeqSBAIJ",
1632                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1633   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1634                                      "MatRetrieveValues_SeqSBAIJ",
1635                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1636   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1637                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1638                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1639   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1640                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1641                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1642   PetscFunctionReturn(0);
1643 }
1644 EXTERN_C_END
1645 
1646 #undef __FUNCT__
1647 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1648 /*@C
1649    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1650    compressed row) format.  For good matrix assembly performance the
1651    user should preallocate the matrix storage by setting the parameter nz
1652    (or the array nnz).  By setting these parameters accurately, performance
1653    during matrix assembly can be increased by more than a factor of 50.
1654 
1655    Collective on Mat
1656 
1657    Input Parameters:
1658 +  A - the symmetric matrix
1659 .  bs - size of block
1660 .  nz - number of block nonzeros per block row (same for all rows)
1661 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1662          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1663 
1664    Options Database Keys:
1665 .   -mat_no_unroll - uses code that does not unroll the loops in the
1666                      block calculations (much slower)
1667 .    -mat_block_size - size of the blocks to use
1668 
1669    Level: intermediate
1670 
1671    Notes:
1672    Specify the preallocated storage with either nz or nnz (not both).
1673    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1674    allocation.  For additional details, see the users manual chapter on
1675    matrices.
1676 
1677 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1678 @*/
1679 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1680   int ierr,(*f)(Mat,int,int,const int[]);
1681 
1682   PetscFunctionBegin;
1683   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1684   if (f) {
1685     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1686   }
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatCreateSeqSBAIJ"
1692 /*@C
1693    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1694    compressed row) format.  For good matrix assembly performance the
1695    user should preallocate the matrix storage by setting the parameter nz
1696    (or the array nnz).  By setting these parameters accurately, performance
1697    during matrix assembly can be increased by more than a factor of 50.
1698 
1699    Collective on MPI_Comm
1700 
1701    Input Parameters:
1702 +  comm - MPI communicator, set to PETSC_COMM_SELF
1703 .  bs - size of block
1704 .  m - number of rows, or number of columns
1705 .  nz - number of block nonzeros per block row (same for all rows)
1706 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1707          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1708 
1709    Output Parameter:
1710 .  A - the symmetric matrix
1711 
1712    Options Database Keys:
1713 .   -mat_no_unroll - uses code that does not unroll the loops in the
1714                      block calculations (much slower)
1715 .    -mat_block_size - size of the blocks to use
1716 
1717    Level: intermediate
1718 
1719    Notes:
1720 
1721    Specify the preallocated storage with either nz or nnz (not both).
1722    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1723    allocation.  For additional details, see the users manual chapter on
1724    matrices.
1725 
1726 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1727 @*/
1728 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1729 {
1730   int ierr;
1731 
1732   PetscFunctionBegin;
1733   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1734   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1735   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1741 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1742 {
1743   Mat         C;
1744   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1745   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1746 
1747   PetscFunctionBegin;
1748   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1749 
1750   *B = 0;
1751   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1752   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1753   c    = (Mat_SeqSBAIJ*)C->data;
1754 
1755   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1756   C->preallocated   = PETSC_TRUE;
1757   C->factor         = A->factor;
1758   c->row            = 0;
1759   c->icol           = 0;
1760   c->saved_values   = 0;
1761   c->keepzeroedrows = a->keepzeroedrows;
1762   C->assembled      = PETSC_TRUE;
1763 
1764   c->bs         = a->bs;
1765   c->bs2        = a->bs2;
1766   c->mbs        = a->mbs;
1767   c->nbs        = a->nbs;
1768 
1769   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1770   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1771   for (i=0; i<mbs; i++) {
1772     c->imax[i] = a->imax[i];
1773     c->ilen[i] = a->ilen[i];
1774   }
1775 
1776   /* allocate the matrix space */
1777   c->singlemalloc = PETSC_TRUE;
1778   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1779   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1780   c->j = (int*)(c->a + nz*bs2);
1781   c->i = c->j + nz;
1782   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1783   if (mbs > 0) {
1784     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1785     if (cpvalues == MAT_COPY_VALUES) {
1786       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1787     } else {
1788       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1789     }
1790   }
1791 
1792   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1793   c->sorted      = a->sorted;
1794   c->roworiented = a->roworiented;
1795   c->nonew       = a->nonew;
1796 
1797   if (a->diag) {
1798     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1799     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1800     for (i=0; i<mbs; i++) {
1801       c->diag[i] = a->diag[i];
1802     }
1803   } else c->diag        = 0;
1804   c->s_nz               = a->s_nz;
1805   c->s_maxnz            = a->s_maxnz;
1806   c->solve_work         = 0;
1807   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1808   c->mult_work          = 0;
1809   *B = C;
1810   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1816 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1817 {
1818   Mat_SeqSBAIJ *a;
1819   Mat          B;
1820   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1821   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1822   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1823   int          *masked,nmask,tmp,bs2,ishift;
1824   PetscScalar  *aa;
1825   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1826 
1827   PetscFunctionBegin;
1828   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1829   bs2  = bs*bs;
1830 
1831   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1832   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1833   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1834   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1835   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1836   M = header[1]; N = header[2]; nz = header[3];
1837 
1838   if (header[3] < 0) {
1839     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1840   }
1841 
1842   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1843 
1844   /*
1845      This code adds extra rows to make sure the number of rows is
1846     divisible by the blocksize
1847   */
1848   mbs        = M/bs;
1849   extra_rows = bs - M + bs*(mbs);
1850   if (extra_rows == bs) extra_rows = 0;
1851   else                  mbs++;
1852   if (extra_rows) {
1853     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1854   }
1855 
1856   /* read in row lengths */
1857   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1858   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1859   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1860 
1861   /* read in column indices */
1862   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1863   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1864   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1865 
1866   /* loop over row lengths determining block row lengths */
1867   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1868   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1869   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1870   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1871   masked   = mask + mbs;
1872   rowcount = 0; nzcount = 0;
1873   for (i=0; i<mbs; i++) {
1874     nmask = 0;
1875     for (j=0; j<bs; j++) {
1876       kmax = rowlengths[rowcount];
1877       for (k=0; k<kmax; k++) {
1878         tmp = jj[nzcount++]/bs;   /* block col. index */
1879         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1880       }
1881       rowcount++;
1882     }
1883     s_browlengths[i] += nmask;
1884 
1885     /* zero out the mask elements we set */
1886     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1887   }
1888 
1889   /* create our matrix */
1890   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1891   ierr = MatSetType(B,type);CHKERRQ(ierr);
1892   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1893   a = (Mat_SeqSBAIJ*)B->data;
1894 
1895   /* set matrix "i" values */
1896   a->i[0] = 0;
1897   for (i=1; i<= mbs; i++) {
1898     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1899     a->ilen[i-1] = s_browlengths[i-1];
1900   }
1901   a->s_nz = a->i[mbs];
1902 
1903   /* read in nonzero values */
1904   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1905   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1906   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1907 
1908   /* set "a" and "j" values into matrix */
1909   nzcount = 0; jcount = 0;
1910   for (i=0; i<mbs; i++) {
1911     nzcountb = nzcount;
1912     nmask    = 0;
1913     for (j=0; j<bs; j++) {
1914       kmax = rowlengths[i*bs+j];
1915       for (k=0; k<kmax; k++) {
1916         tmp = jj[nzcount++]/bs; /* block col. index */
1917         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1918       }
1919     }
1920     /* sort the masked values */
1921     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1922 
1923     /* set "j" values into matrix */
1924     maskcount = 1;
1925     for (j=0; j<nmask; j++) {
1926       a->j[jcount++]  = masked[j];
1927       mask[masked[j]] = maskcount++;
1928     }
1929 
1930     /* set "a" values into matrix */
1931     ishift = bs2*a->i[i];
1932     for (j=0; j<bs; j++) {
1933       kmax = rowlengths[i*bs+j];
1934       for (k=0; k<kmax; k++) {
1935         tmp       = jj[nzcountb]/bs ; /* block col. index */
1936         if (tmp >= i){
1937           block     = mask[tmp] - 1;
1938           point     = jj[nzcountb] - bs*tmp;
1939           idx       = ishift + bs2*block + j + bs*point;
1940           a->a[idx] = aa[nzcountb];
1941         }
1942         nzcountb++;
1943       }
1944     }
1945     /* zero out the mask elements we set */
1946     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1947   }
1948   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1949 
1950   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1951   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1952   ierr = PetscFree(aa);CHKERRQ(ierr);
1953   ierr = PetscFree(jj);CHKERRQ(ierr);
1954   ierr = PetscFree(mask);CHKERRQ(ierr);
1955 
1956   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1957   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1958   ierr = MatView_Private(B);CHKERRQ(ierr);
1959   *A = B;
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1965 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1966 {
1967   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1968   MatScalar    *aa=a->a,*v,*v1;
1969   PetscScalar  *x,*b,*t,sum,d;
1970   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1971   int          nz,nz1,*vj,*vj1,i;
1972 
1973   PetscFunctionBegin;
1974   its = its*lits;
1975   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1976 
1977   if (bs > 1)
1978     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1979 
1980   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1981   if (xx != bb) {
1982     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1983   } else {
1984     b = x;
1985   }
1986 
1987   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1988 
1989   if (flag & SOR_ZERO_INITIAL_GUESS) {
1990     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1991       for (i=0; i<m; i++)
1992         t[i] = b[i];
1993 
1994       for (i=0; i<m; i++){
1995         d  = *(aa + ai[i]);  /* diag[i] */
1996         v  = aa + ai[i] + 1;
1997         vj = aj + ai[i] + 1;
1998         nz = ai[i+1] - ai[i] - 1;
1999         x[i] = omega*t[i]/d;
2000         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2001         PetscLogFlops(2*nz-1);
2002       }
2003     }
2004 
2005     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2006       for (i=0; i<m; i++)
2007         t[i] = b[i];
2008 
2009       for (i=0; i<m-1; i++){  /* update rhs */
2010         v  = aa + ai[i] + 1;
2011         vj = aj + ai[i] + 1;
2012         nz = ai[i+1] - ai[i] - 1;
2013         while (nz--) t[*vj++] -= x[i]*(*v++);
2014         PetscLogFlops(2*nz-1);
2015       }
2016       for (i=m-1; i>=0; i--){
2017         d  = *(aa + ai[i]);
2018         v  = aa + ai[i] + 1;
2019         vj = aj + ai[i] + 1;
2020         nz = ai[i+1] - ai[i] - 1;
2021         sum = t[i];
2022         while (nz--) sum -= x[*vj++]*(*v++);
2023         PetscLogFlops(2*nz-1);
2024         x[i] =   (1-omega)*x[i] + omega*sum/d;
2025       }
2026     }
2027     its--;
2028   }
2029 
2030   while (its--) {
2031     /*
2032        forward sweep:
2033        for i=0,...,m-1:
2034          sum[i] = (b[i] - U(i,:)x )/d[i];
2035          x[i]   = (1-omega)x[i] + omega*sum[i];
2036          b      = b - x[i]*U^T(i,:);
2037 
2038     */
2039     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2040       for (i=0; i<m; i++)
2041         t[i] = b[i];
2042 
2043       for (i=0; i<m; i++){
2044         d  = *(aa + ai[i]);  /* diag[i] */
2045         v  = aa + ai[i] + 1; v1=v;
2046         vj = aj + ai[i] + 1; vj1=vj;
2047         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2048         sum = t[i];
2049         while (nz1--) sum -= (*v1++)*x[*vj1++];
2050         x[i] = (1-omega)*x[i] + omega*sum/d;
2051         while (nz--) t[*vj++] -= x[i]*(*v++);
2052         PetscLogFlops(4*nz-2);
2053       }
2054     }
2055 
2056   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2057       /*
2058        backward sweep:
2059        b = b - x[i]*U^T(i,:), i=0,...,n-2
2060        for i=m-1,...,0:
2061          sum[i] = (b[i] - U(i,:)x )/d[i];
2062          x[i]   = (1-omega)x[i] + omega*sum[i];
2063       */
2064       for (i=0; i<m; i++)
2065         t[i] = b[i];
2066 
2067       for (i=0; i<m-1; i++){  /* update rhs */
2068         v  = aa + ai[i] + 1;
2069         vj = aj + ai[i] + 1;
2070         nz = ai[i+1] - ai[i] - 1;
2071         while (nz--) t[*vj++] -= x[i]*(*v++);
2072         PetscLogFlops(2*nz-1);
2073       }
2074       for (i=m-1; i>=0; i--){
2075         d  = *(aa + ai[i]);
2076         v  = aa + ai[i] + 1;
2077         vj = aj + ai[i] + 1;
2078         nz = ai[i+1] - ai[i] - 1;
2079         sum = t[i];
2080         while (nz--) sum -= x[*vj++]*(*v++);
2081         PetscLogFlops(2*nz-1);
2082         x[i] =   (1-omega)*x[i] + omega*sum/d;
2083       }
2084     }
2085   }
2086 
2087   ierr = PetscFree(t); CHKERRQ(ierr);
2088   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2089   if (bb != xx) {
2090     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2091   }
2092 
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 
2097 
2098 
2099 
2100 
2101