xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 63b9fa5e258ef37d5bcff491ebacde33f30ed49b)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   default:
183     SETERRQ(PETSC_ERR_SUP,"unknown option");
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
191 {
192   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
193   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
194   MatScalar    *aa,*aa_i;
195   PetscScalar  *v_i;
196 
197   PetscFunctionBegin;
198   bs  = a->bs;
199   ai  = a->i;
200   aj  = a->j;
201   aa  = a->a;
202   bs2 = a->bs2;
203 
204   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
205 
206   bn  = row/bs;   /* Block number */
207   bp  = row % bs; /* Block position */
208   M   = ai[bn+1] - ai[bn];
209   *ncols = bs*M;
210 
211   if (v) {
212     *v = 0;
213     if (*ncols) {
214       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
215       for (i=0; i<M; i++) { /* for each block in the block row */
216         v_i  = *v + i*bs;
217         aa_i = aa + bs2*(ai[bn] + i);
218         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
219       }
220     }
221   }
222 
223   if (cols) {
224     *cols = 0;
225     if (*ncols) {
226       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
227       for (i=0; i<M; i++) { /* for each block in the block row */
228         cols_i = *cols + i*bs;
229         itmp  = bs*aj[ai[bn] + i];
230         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
231       }
232     }
233   }
234 
235   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
236   /* this segment is currently removed, so only entries in the upper triangle are obtained */
237 #ifdef column_search
238   v_i    = *v    + M*bs;
239   cols_i = *cols + M*bs;
240   for (i=0; i<bn; i++){ /* for each block row */
241     M = ai[i+1] - ai[i];
242     for (j=0; j<M; j++){
243       itmp = aj[ai[i] + j];    /* block column value */
244       if (itmp == bn){
245         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
246         for (k=0; k<bs; k++) {
247           *cols_i++ = i*bs+k;
248           *v_i++    = aa_i[k];
249         }
250         *ncols += bs;
251         break;
252       }
253     }
254   }
255 #endif
256 
257   PetscFunctionReturn(0);
258 }
259 
260 #undef __FUNCT__
261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
263 {
264   int ierr;
265 
266   PetscFunctionBegin;
267   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
268   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
275 {
276   int ierr;
277   PetscFunctionBegin;
278   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
285 {
286   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
287   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
288   PetscScalar  *aa;
289   FILE         *file;
290 
291   PetscFunctionBegin;
292   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
293   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
294   col_lens[0] = MAT_FILE_COOKIE;
295 
296   col_lens[1] = A->m;
297   col_lens[2] = A->m;
298   col_lens[3] = a->s_nz*bs2;
299 
300   /* store lengths of each row and write (including header) to file */
301   count = 0;
302   for (i=0; i<a->mbs; i++) {
303     for (j=0; j<bs; j++) {
304       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
305     }
306   }
307   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
308   ierr = PetscFree(col_lens);CHKERRQ(ierr);
309 
310   /* store column indices (zero start index) */
311   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
312   count = 0;
313   for (i=0; i<a->mbs; i++) {
314     for (j=0; j<bs; j++) {
315       for (k=a->i[i]; k<a->i[i+1]; k++) {
316         for (l=0; l<bs; l++) {
317           jj[count++] = bs*a->j[k] + l;
318         }
319       }
320     }
321   }
322   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
323   ierr = PetscFree(jj);CHKERRQ(ierr);
324 
325   /* store nonzero values */
326   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
327   count = 0;
328   for (i=0; i<a->mbs; i++) {
329     for (j=0; j<bs; j++) {
330       for (k=a->i[i]; k<a->i[i+1]; k++) {
331         for (l=0; l<bs; l++) {
332           aa[count++] = a->a[bs2*k + l*bs + j];
333         }
334       }
335     }
336   }
337   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
338   ierr = PetscFree(aa);CHKERRQ(ierr);
339 
340   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
341   if (file) {
342     fprintf(file,"-matload_block_size %d\n",a->bs);
343   }
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
350 {
351   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
352   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
353   char              *name;
354   PetscViewerFormat format;
355 
356   PetscFunctionBegin;
357   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
358   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
359   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
360     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
361   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
362     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
363   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
364     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
365     for (i=0; i<a->mbs; i++) {
366       for (j=0; j<bs; j++) {
367         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
368         for (k=a->i[i]; k<a->i[i+1]; k++) {
369           for (l=0; l<bs; l++) {
370 #if defined(PETSC_USE_COMPLEX)
371             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
372               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
373                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
374             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
375               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
376                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
377             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
378               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
379             }
380 #else
381             if (a->a[bs2*k + l*bs + j] != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
383             }
384 #endif
385           }
386         }
387         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
388       }
389     }
390     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
391   } else {
392     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
393     for (i=0; i<a->mbs; i++) {
394       for (j=0; j<bs; j++) {
395         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
396         for (k=a->i[i]; k<a->i[i+1]; k++) {
397           for (l=0; l<bs; l++) {
398 #if defined(PETSC_USE_COMPLEX)
399             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
400               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
401                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
402             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
403               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
404                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
405             } else {
406               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
407             }
408 #else
409             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
410 #endif
411           }
412         }
413         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
414       }
415     }
416     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
417   }
418   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
425 {
426   Mat          A = (Mat) Aa;
427   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
428   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
429   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
430   MatScalar    *aa;
431   MPI_Comm     comm;
432   PetscViewer  viewer;
433 
434   PetscFunctionBegin;
435   /*
436       This is nasty. If this is called from an originally parallel matrix
437    then all processes call this,but only the first has the matrix so the
438    rest should return immediately.
439   */
440   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
441   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
442   if (rank) PetscFunctionReturn(0);
443 
444   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
445 
446   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
447   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
448 
449   /* loop over matrix elements drawing boxes */
450   color = PETSC_DRAW_BLUE;
451   for (i=0,row=0; i<mbs; i++,row+=bs) {
452     for (j=a->i[i]; j<a->i[i+1]; j++) {
453       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
454       x_l = a->j[j]*bs; x_r = x_l + 1.0;
455       aa = a->a + j*bs2;
456       for (k=0; k<bs; k++) {
457         for (l=0; l<bs; l++) {
458           if (PetscRealPart(*aa++) >=  0.) continue;
459           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
460         }
461       }
462     }
463   }
464   color = PETSC_DRAW_CYAN;
465   for (i=0,row=0; i<mbs; i++,row+=bs) {
466     for (j=a->i[i]; j<a->i[i+1]; j++) {
467       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
468       x_l = a->j[j]*bs; x_r = x_l + 1.0;
469       aa = a->a + j*bs2;
470       for (k=0; k<bs; k++) {
471         for (l=0; l<bs; l++) {
472           if (PetscRealPart(*aa++) != 0.) continue;
473           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
474         }
475       }
476     }
477   }
478 
479   color = PETSC_DRAW_RED;
480   for (i=0,row=0; i<mbs; i++,row+=bs) {
481     for (j=a->i[i]; j<a->i[i+1]; j++) {
482       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
483       x_l = a->j[j]*bs; x_r = x_l + 1.0;
484       aa = a->a + j*bs2;
485       for (k=0; k<bs; k++) {
486         for (l=0; l<bs; l++) {
487           if (PetscRealPart(*aa++) <= 0.) continue;
488           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
499 {
500   int          ierr;
501   PetscReal    xl,yl,xr,yr,w,h;
502   PetscDraw    draw;
503   PetscTruth   isnull;
504 
505   PetscFunctionBegin;
506 
507   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
508   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
509 
510   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
511   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
512   xr += w;    yr += h;  xl = -w;     yl = -h;
513   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
514   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
515   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 #undef __FUNCT__
520 #define __FUNCT__ "MatView_SeqSBAIJ"
521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
522 {
523   int        ierr;
524   PetscTruth issocket,isascii,isbinary,isdraw;
525 
526   PetscFunctionBegin;
527   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
529   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
530   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
531   if (issocket) {
532     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
533   } else if (isascii){
534     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
535   } else if (isbinary) {
536     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
537   } else if (isdraw) {
538     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
539   } else {
540     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
541   }
542   PetscFunctionReturn(0);
543 }
544 
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
548 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
549 {
550   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
551   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
552   int        *ai = a->i,*ailen = a->ilen;
553   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
554   MatScalar  *ap,*aa = a->a,zero = 0.0;
555 
556   PetscFunctionBegin;
557   for (k=0; k<m; k++) { /* loop over rows */
558     row  = im[k]; brow = row/bs;
559     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
560     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
561     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
562     nrow = ailen[brow];
563     for (l=0; l<n; l++) { /* loop over columns */
564       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
565       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
566       col  = in[l] ;
567       bcol = col/bs;
568       cidx = col%bs;
569       ridx = row%bs;
570       high = nrow;
571       low  = 0; /* assume unsorted */
572       while (high-low > 5) {
573         t = (low+high)/2;
574         if (rp[t] > bcol) high = t;
575         else             low  = t;
576       }
577       for (i=low; i<high; i++) {
578         if (rp[i] > bcol) break;
579         if (rp[i] == bcol) {
580           *v++ = ap[bs2*i+bs*cidx+ridx];
581           goto finished;
582         }
583       }
584       *v++ = zero;
585       finished:;
586     }
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 
592 #undef __FUNCT__
593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
595 {
596   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
597   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
598   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
599   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
600   PetscTruth  roworiented=a->roworiented;
601   MatScalar   *value = v,*ap,*aa = a->a,*bap;
602 
603   PetscFunctionBegin;
604   if (roworiented) {
605     stepval = (n-1)*bs;
606   } else {
607     stepval = (m-1)*bs;
608   }
609   for (k=0; k<m; k++) { /* loop over added rows */
610     row  = im[k];
611     if (row < 0) continue;
612 #if defined(PETSC_USE_BOPT_g)
613     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
614 #endif
615     rp   = aj + ai[row];
616     ap   = aa + bs2*ai[row];
617     rmax = imax[row];
618     nrow = ailen[row];
619     low  = 0;
620     for (l=0; l<n; l++) { /* loop over added columns */
621       if (in[l] < 0) continue;
622 #if defined(PETSC_USE_BOPT_g)
623       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
624 #endif
625       col = in[l];
626       if (col < row) continue; /* ignore lower triangular block */
627       if (roworiented) {
628         value = v + k*(stepval+bs)*bs + l*bs;
629       } else {
630         value = v + l*(stepval+bs)*bs + k*bs;
631       }
632       if (!sorted) low = 0; high = nrow;
633       while (high-low > 7) {
634         t = (low+high)/2;
635         if (rp[t] > col) high = t;
636         else             low  = t;
637       }
638       for (i=low; i<high; i++) {
639         if (rp[i] > col) break;
640         if (rp[i] == col) {
641           bap  = ap +  bs2*i;
642           if (roworiented) {
643             if (is == ADD_VALUES) {
644               for (ii=0; ii<bs; ii++,value+=stepval) {
645                 for (jj=ii; jj<bs2; jj+=bs) {
646                   bap[jj] += *value++;
647                 }
648               }
649             } else {
650               for (ii=0; ii<bs; ii++,value+=stepval) {
651                 for (jj=ii; jj<bs2; jj+=bs) {
652                   bap[jj] = *value++;
653                 }
654               }
655             }
656           } else {
657             if (is == ADD_VALUES) {
658               for (ii=0; ii<bs; ii++,value+=stepval) {
659                 for (jj=0; jj<bs; jj++) {
660                   *bap++ += *value++;
661                 }
662               }
663             } else {
664               for (ii=0; ii<bs; ii++,value+=stepval) {
665                 for (jj=0; jj<bs; jj++) {
666                   *bap++  = *value++;
667                 }
668               }
669             }
670           }
671           goto noinsert2;
672         }
673       }
674       if (nonew == 1) goto noinsert2;
675       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
676       if (nrow >= rmax) {
677         /* there is no extra room in row, therefore enlarge */
678         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
679         MatScalar *new_a;
680 
681         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
682 
683         /* malloc new storage space */
684         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
685 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
686         new_j   = (int*)(new_a + bs2*new_nz);
687         new_i   = new_j + new_nz;
688 
689         /* copy over old data into new slots */
690         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
691         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
692         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
693         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
694         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
695         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
696         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
697         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
698         /* free up old matrix storage */
699         ierr = PetscFree(a->a);CHKERRQ(ierr);
700         if (!a->singlemalloc) {
701           ierr = PetscFree(a->i);CHKERRQ(ierr);
702           ierr = PetscFree(a->j);CHKERRQ(ierr);
703         }
704         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
705         a->singlemalloc = PETSC_TRUE;
706 
707         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
708         rmax = imax[row] = imax[row] + CHUNKSIZE;
709         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
710         a->s_maxnz += bs2*CHUNKSIZE;
711         a->reallocs++;
712         a->s_nz++;
713       }
714       N = nrow++ - 1;
715       /* shift up all the later entries in this row */
716       for (ii=N; ii>=i; ii--) {
717         rp[ii+1] = rp[ii];
718         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
719       }
720       if (N >= i) {
721         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
722       }
723       rp[i] = col;
724       bap   = ap +  bs2*i;
725       if (roworiented) {
726         for (ii=0; ii<bs; ii++,value+=stepval) {
727           for (jj=ii; jj<bs2; jj+=bs) {
728             bap[jj] = *value++;
729           }
730         }
731       } else {
732         for (ii=0; ii<bs; ii++,value+=stepval) {
733           for (jj=0; jj<bs; jj++) {
734             *bap++  = *value++;
735           }
736         }
737       }
738       noinsert2:;
739       low = i;
740     }
741     ailen[row] = nrow;
742   }
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
748 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
749 {
750   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
751   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
752   int        m = A->m,*ip,N,*ailen = a->ilen;
753   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
754   MatScalar  *aa = a->a,*ap;
755 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS)
756   PetscTruth   flag;
757 #endif
758 
759   PetscFunctionBegin;
760   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
761 
762   if (m) rmax = ailen[0];
763   for (i=1; i<mbs; i++) {
764     /* move each row back by the amount of empty slots (fshift) before it*/
765     fshift += imax[i-1] - ailen[i-1];
766     rmax   = PetscMax(rmax,ailen[i]);
767     if (fshift) {
768       ip = aj + ai[i]; ap = aa + bs2*ai[i];
769       N = ailen[i];
770       for (j=0; j<N; j++) {
771         ip[j-fshift] = ip[j];
772         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
773       }
774     }
775     ai[i] = ai[i-1] + ailen[i-1];
776   }
777   if (mbs) {
778     fshift += imax[mbs-1] - ailen[mbs-1];
779     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
780   }
781   /* reset ilen and imax for each row */
782   for (i=0; i<mbs; i++) {
783     ailen[i] = imax[i] = ai[i+1] - ai[i];
784   }
785   a->s_nz = ai[mbs];
786 
787   /* diagonals may have moved, reset it */
788   if (a->diag) {
789     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
790   }
791   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
792            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
793   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
794            a->reallocs);
795   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
796   a->reallocs          = 0;
797   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
798 
799 #if defined(PETSC_HAVE_SPOOLES)
800   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
801   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); }
802 #endif
803 #if defined(PETSC_HAVE_MUMPS)
804   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr);
805   if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); }
806 #endif
807 
808   PetscFunctionReturn(0);
809 }
810 
811 /*
812    This function returns an array of flags which indicate the locations of contiguous
813    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
814    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
815    Assume: sizes should be long enough to hold all the values.
816 */
817 #undef __FUNCT__
818 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
819 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
820 {
821   int        i,j,k,row;
822   PetscTruth flg;
823 
824   PetscFunctionBegin;
825   for (i=0,j=0; i<n; j++) {
826     row = idx[i];
827     if (row%bs!=0) { /* Not the begining of a block */
828       sizes[j] = 1;
829       i++;
830     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
831       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
832       i++;
833     } else { /* Begining of the block, so check if the complete block exists */
834       flg = PETSC_TRUE;
835       for (k=1; k<bs; k++) {
836         if (row+k != idx[i+k]) { /* break in the block */
837           flg = PETSC_FALSE;
838           break;
839         }
840       }
841       if (flg == PETSC_TRUE) { /* No break in the bs */
842         sizes[j] = bs;
843         i+= bs;
844       } else {
845         sizes[j] = 1;
846         i++;
847       }
848     }
849   }
850   *bs_max = j;
851   PetscFunctionReturn(0);
852 }
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
856 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag)
857 {
858   PetscFunctionBegin;
859   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
860 }
861 
862 /* Only add/insert a(i,j) with i<=j (blocks).
863    Any a(i,j) with i>j input by user is ingored.
864 */
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
868 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
869 {
870   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
871   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
872   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
873   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
874   int         ridx,cidx,bs2=a->bs2,ierr;
875   MatScalar   *ap,value,*aa=a->a,*bap;
876 
877   PetscFunctionBegin;
878 
879   for (k=0; k<m; k++) { /* loop over added rows */
880     row  = im[k];       /* row number */
881     brow = row/bs;      /* block row number */
882     if (row < 0) continue;
883 #if defined(PETSC_USE_BOPT_g)
884     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
885 #endif
886     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
887     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
888     rmax = imax[brow];  /* maximum space allocated for this row */
889     nrow = ailen[brow]; /* actual length of this row */
890     low  = 0;
891 
892     for (l=0; l<n; l++) { /* loop over added columns */
893       if (in[l] < 0) continue;
894 #if defined(PETSC_USE_BOPT_g)
895       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
896 #endif
897       col = in[l];
898       bcol = col/bs;              /* block col number */
899 
900       if (brow <= bcol){
901         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
902         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
903           /* element value a(k,l) */
904           if (roworiented) {
905             value = v[l + k*n];
906           } else {
907             value = v[k + l*m];
908           }
909 
910           /* move pointer bap to a(k,l) quickly and add/insert value */
911           if (!sorted) low = 0; high = nrow;
912           while (high-low > 7) {
913             t = (low+high)/2;
914             if (rp[t] > bcol) high = t;
915             else              low  = t;
916           }
917           for (i=low; i<high; i++) {
918             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
919             if (rp[i] > bcol) break;
920             if (rp[i] == bcol) {
921               bap  = ap +  bs2*i + bs*cidx + ridx;
922               if (is == ADD_VALUES) *bap += value;
923               else                  *bap  = value;
924               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
925               if (brow == bcol && ridx < cidx){
926                 bap  = ap +  bs2*i + bs*ridx + cidx;
927                 if (is == ADD_VALUES) *bap += value;
928                 else                  *bap  = value;
929               }
930               goto noinsert1;
931             }
932           }
933 
934       if (nonew == 1) goto noinsert1;
935       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
936       if (nrow >= rmax) {
937         /* there is no extra room in row, therefore enlarge */
938         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
939         MatScalar *new_a;
940 
941         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
942 
943         /* Malloc new storage space */
944         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
945         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
946         new_j = (int*)(new_a + bs2*new_nz);
947         new_i = new_j + new_nz;
948 
949         /* copy over old data into new slots */
950         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
951         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
952         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
953         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
954         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
955         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
956         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
957         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
958         /* free up old matrix storage */
959         ierr = PetscFree(a->a);CHKERRQ(ierr);
960         if (!a->singlemalloc) {
961           ierr = PetscFree(a->i);CHKERRQ(ierr);
962           ierr = PetscFree(a->j);CHKERRQ(ierr);
963         }
964         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
965         a->singlemalloc = PETSC_TRUE;
966 
967         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
968         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
969         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
970         a->s_maxnz += bs2*CHUNKSIZE;
971         a->reallocs++;
972         a->s_nz++;
973       }
974 
975       N = nrow++ - 1;
976       /* shift up all the later entries in this row */
977       for (ii=N; ii>=i; ii--) {
978         rp[ii+1] = rp[ii];
979         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
980       }
981       if (N>=i) {
982         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
983       }
984       rp[i]                      = bcol;
985       ap[bs2*i + bs*cidx + ridx] = value;
986       noinsert1:;
987       low = i;
988       /* } */
989         }
990       } /* end of if .. if..  */
991     }                     /* end of loop over added columns */
992     ailen[brow] = nrow;
993   }                       /* end of loop over added rows */
994 
995   PetscFunctionReturn(0);
996 }
997 
998 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
999 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
1000 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
1001 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1002 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1003 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
1004 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
1005 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat);
1006 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
1007 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1008 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1009 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1010 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1011 extern int MatZeroEntries_SeqSBAIJ(Mat);
1012 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1013 
1014 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1015 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1016 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1017 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1018 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1019 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1020 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1021 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1022 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1023 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1024 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1025 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1026 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1027 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1028 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1029 
1030 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1031 
1032 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1033 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1034 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1035 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1036 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1037 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1038 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1039 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1040 
1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1046 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1047 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1048 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1049 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1050 
1051 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1052 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1053 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1054 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1055 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1056 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1057 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1058 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1059 
1060 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1061 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1062 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1063 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1064 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1065 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1066 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1067 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1068 
1069 #ifdef HAVE_MatICCFactor
1070 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1073 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1074 {
1075   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1076   Mat         outA;
1077   int         ierr;
1078   PetscTruth  row_identity,col_identity;
1079 
1080   PetscFunctionBegin;
1081   /*
1082   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1083   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1084   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1085   if (!row_identity || !col_identity) {
1086     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1087   }
1088   */
1089 
1090   outA          = inA;
1091   inA->factor   = FACTOR_CHOLESKY;
1092 
1093   if (!a->diag) {
1094     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1095   }
1096   /*
1097       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1098       for ILU(0) factorization with natural ordering
1099   */
1100   switch (a->bs) {
1101   case 1:
1102     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1103     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1104     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1105     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1106   case 2:
1107     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1108     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1109     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1110     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1111     break;
1112   case 3:
1113     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1114     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1115     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1116     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1117     break;
1118   case 4:
1119     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1120     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1121     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1122     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1123     break;
1124   case 5:
1125     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1126     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1127     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1128     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1129     break;
1130   case 6:
1131     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1132     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1133     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1134     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1135     break;
1136   case 7:
1137     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1138     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1139     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1140     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1141     break;
1142   default:
1143     a->row        = row;
1144     a->icol       = col;
1145     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1146     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1147 
1148     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1149     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1150     PetscLogObjectParent(inA,a->icol);
1151 
1152     if (!a->solve_work) {
1153       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1154       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1155     }
1156   }
1157 
1158   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1159 
1160   PetscFunctionReturn(0);
1161 }
1162 #endif
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1166 int MatPrintHelp_SeqSBAIJ(Mat A)
1167 {
1168   static PetscTruth called = PETSC_FALSE;
1169   MPI_Comm          comm = A->comm;
1170   int               ierr;
1171 
1172   PetscFunctionBegin;
1173   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1174   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1175   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 EXTERN_C_BEGIN
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1182 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1183 {
1184   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1185   int         i,nz,n;
1186 
1187   PetscFunctionBegin;
1188   nz = baij->s_maxnz;
1189   n  = mat->n;
1190   for (i=0; i<nz; i++) {
1191     baij->j[i] = indices[i];
1192   }
1193   baij->s_nz = nz;
1194   for (i=0; i<n; i++) {
1195     baij->ilen[i] = baij->imax[i];
1196   }
1197 
1198   PetscFunctionReturn(0);
1199 }
1200 EXTERN_C_END
1201 
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1204 /*@
1205     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1206        in the matrix.
1207 
1208   Input Parameters:
1209 +  mat     - the SeqSBAIJ matrix
1210 -  indices - the column indices
1211 
1212   Level: advanced
1213 
1214   Notes:
1215     This can be called if you have precomputed the nonzero structure of the
1216   matrix and want to provide it to the matrix object to improve the performance
1217   of the MatSetValues() operation.
1218 
1219     You MUST have set the correct numbers of nonzeros per row in the call to
1220   MatCreateSeqSBAIJ().
1221 
1222     MUST be called before any calls to MatSetValues()
1223 
1224 .seealso: MatCreateSeqSBAIJ
1225 @*/
1226 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1227 {
1228   int ierr,(*f)(Mat,int *);
1229 
1230   PetscFunctionBegin;
1231   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1232   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1233   if (f) {
1234     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1235   } else {
1236     SETERRQ(1,"Wrong type of matrix to set column indices");
1237   }
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1243 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1244 {
1245   int        ierr;
1246 
1247   PetscFunctionBegin;
1248   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1254 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array)
1255 {
1256   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1257   PetscFunctionBegin;
1258   *array = a->a;
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1264 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array)
1265 {
1266   PetscFunctionBegin;
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #include "petscblaslapack.h"
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1273 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1274 {
1275   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1276   int          ierr,one=1,i,bs=y->bs,bs2,j;
1277 
1278   PetscFunctionBegin;
1279   if (str == SAME_NONZERO_PATTERN) {
1280     BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one);
1281   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1282     if (y->xtoy && y->XtoY != X) {
1283       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1284       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1285     }
1286     if (!y->xtoy) { /* get xtoy */
1287       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1288       y->XtoY = X;
1289     }
1290     bs2 = bs*bs;
1291     for (i=0; i<x->s_nz; i++) {
1292       j = 0;
1293       while (j < bs2){
1294         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1295         j++;
1296       }
1297     }
1298     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz));
1299   } else {
1300     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1301   }
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 /* -------------------------------------------------------------------*/
1306 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1307        MatGetRow_SeqSBAIJ,
1308        MatRestoreRow_SeqSBAIJ,
1309        MatMult_SeqSBAIJ_N,
1310        MatMultAdd_SeqSBAIJ_N,
1311        MatMultTranspose_SeqSBAIJ,
1312        MatMultTransposeAdd_SeqSBAIJ,
1313        MatSolve_SeqSBAIJ_N,
1314        0,
1315        0,
1316        0,
1317        0,
1318        MatCholeskyFactor_SeqSBAIJ,
1319        MatRelax_SeqSBAIJ,
1320        MatTranspose_SeqSBAIJ,
1321        MatGetInfo_SeqSBAIJ,
1322        MatEqual_SeqSBAIJ,
1323        MatGetDiagonal_SeqSBAIJ,
1324        MatDiagonalScale_SeqSBAIJ,
1325        MatNorm_SeqSBAIJ,
1326        0,
1327        MatAssemblyEnd_SeqSBAIJ,
1328        0,
1329        MatSetOption_SeqSBAIJ,
1330        MatZeroEntries_SeqSBAIJ,
1331        MatZeroRows_SeqSBAIJ,
1332        0,
1333        0,
1334        MatCholeskyFactorSymbolic_SeqSBAIJ,
1335        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1336        MatSetUpPreallocation_SeqSBAIJ,
1337        0,
1338        MatICCFactorSymbolic_SeqSBAIJ,
1339        MatGetArray_SeqSBAIJ,
1340        MatRestoreArray_SeqSBAIJ,
1341        MatDuplicate_SeqSBAIJ,
1342        0,
1343        0,
1344        0,
1345        0,
1346        MatAXPY_SeqSBAIJ,
1347        MatGetSubMatrices_SeqSBAIJ,
1348        MatIncreaseOverlap_SeqSBAIJ,
1349        MatGetValues_SeqSBAIJ,
1350        0,
1351        MatPrintHelp_SeqSBAIJ,
1352        MatScale_SeqSBAIJ,
1353        0,
1354        0,
1355        0,
1356        MatGetBlockSize_SeqSBAIJ,
1357        MatGetRowIJ_SeqSBAIJ,
1358        MatRestoreRowIJ_SeqSBAIJ,
1359        0,
1360        0,
1361        0,
1362        0,
1363        0,
1364        0,
1365        MatSetValuesBlocked_SeqSBAIJ,
1366        MatGetSubMatrix_SeqSBAIJ,
1367        0,
1368        0,
1369        MatGetPetscMaps_Petsc,
1370        0,
1371        0,
1372        0,
1373        0,
1374        0,
1375        0,
1376        MatGetRowMax_SeqSBAIJ,
1377        0,
1378        0,
1379        0,
1380        0,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386        0,
1387        0,
1388        0,
1389        0,
1390 #if !defined(PETSC_USE_COMPLEX)
1391        MatGetInertia_SeqSBAIJ
1392 #else
1393        0
1394 #endif
1395 };
1396 
1397 EXTERN_C_BEGIN
1398 #undef __FUNCT__
1399 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1400 int MatStoreValues_SeqSBAIJ(Mat mat)
1401 {
1402   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1403   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1404   int         ierr;
1405 
1406   PetscFunctionBegin;
1407   if (aij->nonew != 1) {
1408     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1409   }
1410 
1411   /* allocate space for values if not already there */
1412   if (!aij->saved_values) {
1413     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1414   }
1415 
1416   /* copy values over */
1417   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 EXTERN_C_END
1421 
1422 EXTERN_C_BEGIN
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1425 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1426 {
1427   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1428   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1429 
1430   PetscFunctionBegin;
1431   if (aij->nonew != 1) {
1432     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1433   }
1434   if (!aij->saved_values) {
1435     SETERRQ(1,"Must call MatStoreValues(A);first");
1436   }
1437 
1438   /* copy values over */
1439   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1440   PetscFunctionReturn(0);
1441 }
1442 EXTERN_C_END
1443 
1444 EXTERN_C_BEGIN
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1447 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1448 {
1449   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1450   int          i,len,ierr,mbs,bs2;
1451   PetscTruth   flg;
1452   int          s_nz;
1453 
1454   PetscFunctionBegin;
1455   B->preallocated = PETSC_TRUE;
1456   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1457   mbs  = B->m/bs;
1458   bs2  = bs*bs;
1459 
1460   if (mbs*bs != B->m) {
1461     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1462   }
1463 
1464   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1465   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1466   if (nnz) {
1467     for (i=0; i<mbs; i++) {
1468       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1469       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1470     }
1471   }
1472 
1473   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1474   if (!flg) {
1475     switch (bs) {
1476     case 1:
1477       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1478       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1479       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1480       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1481       B->ops->mult            = MatMult_SeqSBAIJ_1;
1482       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1483       break;
1484     case 2:
1485       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1486       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1487       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1488       B->ops->mult            = MatMult_SeqSBAIJ_2;
1489       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1490       break;
1491     case 3:
1492       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1493       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1494       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1495       B->ops->mult            = MatMult_SeqSBAIJ_3;
1496       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1497       break;
1498     case 4:
1499       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1500       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1501       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1502       B->ops->mult            = MatMult_SeqSBAIJ_4;
1503       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1504       break;
1505     case 5:
1506       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1507       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1508       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1509       B->ops->mult            = MatMult_SeqSBAIJ_5;
1510       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1511       break;
1512     case 6:
1513       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1514       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1515       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1516       B->ops->mult            = MatMult_SeqSBAIJ_6;
1517       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1518       break;
1519     case 7:
1520       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1521       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1522       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1523       B->ops->mult            = MatMult_SeqSBAIJ_7;
1524       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1525       break;
1526     }
1527   }
1528 
1529   b->mbs = mbs;
1530   b->nbs = mbs;
1531   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1532   if (!nnz) {
1533     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1534     else if (nz <= 0)        nz = 1;
1535     for (i=0; i<mbs; i++) {
1536       b->imax[i] = nz;
1537     }
1538     nz = nz*mbs; /* total nz */
1539   } else {
1540     nz = 0;
1541     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1542   }
1543   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1544   s_nz = nz;
1545 
1546   /* allocate the matrix space */
1547   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1548   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1549   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1550   b->j = (int*)(b->a + s_nz*bs2);
1551   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1552   b->i = b->j + s_nz;
1553   b->singlemalloc = PETSC_TRUE;
1554 
1555   /* pointer to beginning of each row */
1556   b->i[0] = 0;
1557   for (i=1; i<mbs+1; i++) {
1558     b->i[i] = b->i[i-1] + b->imax[i-1];
1559   }
1560 
1561   /* b->ilen will count nonzeros in each block row so far. */
1562   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1563   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1564   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1565 
1566   b->bs               = bs;
1567   b->bs2              = bs2;
1568   b->s_nz             = 0;
1569   b->s_maxnz          = s_nz*bs2;
1570 
1571   b->inew             = 0;
1572   b->jnew             = 0;
1573   b->anew             = 0;
1574   b->a2anew           = 0;
1575   b->permute          = PETSC_FALSE;
1576   PetscFunctionReturn(0);
1577 }
1578 EXTERN_C_END
1579 
1580 EXTERN_C_BEGIN
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1583 int MatCreate_SeqSBAIJ(Mat B)
1584 {
1585   Mat_SeqSBAIJ *b;
1586   int          ierr,size;
1587 
1588   PetscFunctionBegin;
1589   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1590   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1591   B->m = B->M = PetscMax(B->m,B->M);
1592   B->n = B->N = PetscMax(B->n,B->N);
1593 
1594   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1595   B->data = (void*)b;
1596   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1597   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1598   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1599   B->ops->view        = MatView_SeqSBAIJ;
1600   B->factor           = 0;
1601   B->lupivotthreshold = 1.0;
1602   B->mapping          = 0;
1603   b->row              = 0;
1604   b->icol             = 0;
1605   b->reallocs         = 0;
1606   b->saved_values     = 0;
1607 
1608   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1609   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1610 
1611   b->sorted           = PETSC_FALSE;
1612   b->roworiented      = PETSC_TRUE;
1613   b->nonew            = 0;
1614   b->diag             = 0;
1615   b->solve_work       = 0;
1616   b->mult_work        = 0;
1617   B->spptr            = 0;
1618   b->keepzeroedrows   = PETSC_FALSE;
1619   b->xtoy             = 0;
1620   b->XtoY             = 0;
1621 
1622   b->inew             = 0;
1623   b->jnew             = 0;
1624   b->anew             = 0;
1625   b->a2anew           = 0;
1626   b->permute          = PETSC_FALSE;
1627 
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1629                                      "MatStoreValues_SeqSBAIJ",
1630                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1632                                      "MatRetrieveValues_SeqSBAIJ",
1633                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1635                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1636                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1637   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1638                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1639                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1640   PetscFunctionReturn(0);
1641 }
1642 EXTERN_C_END
1643 
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1646 /*@C
1647    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1648    compressed row) format.  For good matrix assembly performance the
1649    user should preallocate the matrix storage by setting the parameter nz
1650    (or the array nnz).  By setting these parameters accurately, performance
1651    during matrix assembly can be increased by more than a factor of 50.
1652 
1653    Collective on Mat
1654 
1655    Input Parameters:
1656 +  A - the symmetric matrix
1657 .  bs - size of block
1658 .  nz - number of block nonzeros per block row (same for all rows)
1659 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1660          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1661 
1662    Options Database Keys:
1663 .   -mat_no_unroll - uses code that does not unroll the loops in the
1664                      block calculations (much slower)
1665 .    -mat_block_size - size of the blocks to use
1666 
1667    Level: intermediate
1668 
1669    Notes:
1670    Specify the preallocated storage with either nz or nnz (not both).
1671    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1672    allocation.  For additional details, see the users manual chapter on
1673    matrices.
1674 
1675 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1676 @*/
1677 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) {
1678   int ierr,(*f)(Mat,int,int,int *);
1679 
1680   PetscFunctionBegin;
1681   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1682   if (f) {
1683     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1684   }
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatCreateSeqSBAIJ"
1690 /*@C
1691    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1692    compressed row) format.  For good matrix assembly performance the
1693    user should preallocate the matrix storage by setting the parameter nz
1694    (or the array nnz).  By setting these parameters accurately, performance
1695    during matrix assembly can be increased by more than a factor of 50.
1696 
1697    Collective on MPI_Comm
1698 
1699    Input Parameters:
1700 +  comm - MPI communicator, set to PETSC_COMM_SELF
1701 .  bs - size of block
1702 .  m - number of rows, or number of columns
1703 .  nz - number of block nonzeros per block row (same for all rows)
1704 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1705          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1706 
1707    Output Parameter:
1708 .  A - the symmetric matrix
1709 
1710    Options Database Keys:
1711 .   -mat_no_unroll - uses code that does not unroll the loops in the
1712                      block calculations (much slower)
1713 .    -mat_block_size - size of the blocks to use
1714 
1715    Level: intermediate
1716 
1717    Notes:
1718 
1719    Specify the preallocated storage with either nz or nnz (not both).
1720    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1721    allocation.  For additional details, see the users manual chapter on
1722    matrices.
1723 
1724 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1725 @*/
1726 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1727 {
1728   int ierr;
1729 
1730   PetscFunctionBegin;
1731   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1732   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1733   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 #undef __FUNCT__
1738 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1739 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1740 {
1741   Mat         C;
1742   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1743   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1744 
1745   PetscFunctionBegin;
1746   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1747 
1748   *B = 0;
1749   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1750   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1751   c    = (Mat_SeqSBAIJ*)C->data;
1752 
1753   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1754   C->preallocated   = PETSC_TRUE;
1755   C->factor         = A->factor;
1756   c->row            = 0;
1757   c->icol           = 0;
1758   c->saved_values   = 0;
1759   c->keepzeroedrows = a->keepzeroedrows;
1760   C->assembled      = PETSC_TRUE;
1761 
1762   c->bs         = a->bs;
1763   c->bs2        = a->bs2;
1764   c->mbs        = a->mbs;
1765   c->nbs        = a->nbs;
1766 
1767   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1768   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1769   for (i=0; i<mbs; i++) {
1770     c->imax[i] = a->imax[i];
1771     c->ilen[i] = a->ilen[i];
1772   }
1773 
1774   /* allocate the matrix space */
1775   c->singlemalloc = PETSC_TRUE;
1776   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1777   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1778   c->j = (int*)(c->a + nz*bs2);
1779   c->i = c->j + nz;
1780   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1781   if (mbs > 0) {
1782     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1783     if (cpvalues == MAT_COPY_VALUES) {
1784       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1785     } else {
1786       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1787     }
1788   }
1789 
1790   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1791   c->sorted      = a->sorted;
1792   c->roworiented = a->roworiented;
1793   c->nonew       = a->nonew;
1794 
1795   if (a->diag) {
1796     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1797     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1798     for (i=0; i<mbs; i++) {
1799       c->diag[i] = a->diag[i];
1800     }
1801   } else c->diag        = 0;
1802   c->s_nz               = a->s_nz;
1803   c->s_maxnz            = a->s_maxnz;
1804   c->solve_work         = 0;
1805   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1806   c->mult_work          = 0;
1807   *B = C;
1808   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 EXTERN_C_BEGIN
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1815 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1816 {
1817   Mat_SeqSBAIJ *a;
1818   Mat          B;
1819   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1820   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1821   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1822   int          *masked,nmask,tmp,bs2,ishift;
1823   PetscScalar  *aa;
1824   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1825 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS)
1826   PetscTruth   flag;
1827 #endif
1828 
1829   PetscFunctionBegin;
1830   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1831   bs2  = bs*bs;
1832 
1833   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1834   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1835   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1836   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1837   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1838   M = header[1]; N = header[2]; nz = header[3];
1839 
1840   if (header[3] < 0) {
1841     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1842   }
1843 
1844   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1845 
1846   /*
1847      This code adds extra rows to make sure the number of rows is
1848     divisible by the blocksize
1849   */
1850   mbs        = M/bs;
1851   extra_rows = bs - M + bs*(mbs);
1852   if (extra_rows == bs) extra_rows = 0;
1853   else                  mbs++;
1854   if (extra_rows) {
1855     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1856   }
1857 
1858   /* read in row lengths */
1859   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1860   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1861   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1862 
1863   /* read in column indices */
1864   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1865   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1866   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1867 
1868   /* loop over row lengths determining block row lengths */
1869   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1870   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1871   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1872   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1873   masked   = mask + mbs;
1874   rowcount = 0; nzcount = 0;
1875   for (i=0; i<mbs; i++) {
1876     nmask = 0;
1877     for (j=0; j<bs; j++) {
1878       kmax = rowlengths[rowcount];
1879       for (k=0; k<kmax; k++) {
1880         tmp = jj[nzcount++]/bs;   /* block col. index */
1881         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1882       }
1883       rowcount++;
1884     }
1885     s_browlengths[i] += nmask;
1886 
1887     /* zero out the mask elements we set */
1888     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1889   }
1890 
1891   /* create our matrix */
1892   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr);
1893   B = *A;
1894   a = (Mat_SeqSBAIJ*)B->data;
1895 
1896   /* set matrix "i" values */
1897   a->i[0] = 0;
1898   for (i=1; i<= mbs; i++) {
1899     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1900     a->ilen[i-1] = s_browlengths[i-1];
1901   }
1902   a->s_nz = a->i[mbs];
1903 
1904   /* read in nonzero values */
1905   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1906   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1907   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1908 
1909   /* set "a" and "j" values into matrix */
1910   nzcount = 0; jcount = 0;
1911   for (i=0; i<mbs; i++) {
1912     nzcountb = nzcount;
1913     nmask    = 0;
1914     for (j=0; j<bs; j++) {
1915       kmax = rowlengths[i*bs+j];
1916       for (k=0; k<kmax; k++) {
1917         tmp = jj[nzcount++]/bs; /* block col. index */
1918         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1919       }
1920     }
1921     /* sort the masked values */
1922     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1923 
1924     /* set "j" values into matrix */
1925     maskcount = 1;
1926     for (j=0; j<nmask; j++) {
1927       a->j[jcount++]  = masked[j];
1928       mask[masked[j]] = maskcount++;
1929     }
1930 
1931     /* set "a" values into matrix */
1932     ishift = bs2*a->i[i];
1933     for (j=0; j<bs; j++) {
1934       kmax = rowlengths[i*bs+j];
1935       for (k=0; k<kmax; k++) {
1936         tmp       = jj[nzcountb]/bs ; /* block col. index */
1937         if (tmp >= i){
1938           block     = mask[tmp] - 1;
1939           point     = jj[nzcountb] - bs*tmp;
1940           idx       = ishift + bs2*block + j + bs*point;
1941           a->a[idx] = aa[nzcountb];
1942         }
1943         nzcountb++;
1944       }
1945     }
1946     /* zero out the mask elements we set */
1947     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1948   }
1949   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1950 
1951   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1952   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1953   ierr = PetscFree(aa);CHKERRQ(ierr);
1954   ierr = PetscFree(jj);CHKERRQ(ierr);
1955   ierr = PetscFree(mask);CHKERRQ(ierr);
1956 
1957   B->assembled = PETSC_TRUE;
1958 #if defined(PETSC_HAVE_SPOOLES)
1959   ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
1960   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(B);CHKERRQ(ierr); }
1961 #endif
1962 #if defined(PETSC_HAVE_MUMPS)
1963   ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr);
1964   if (flag) { ierr = MatUseMUMPS_MPIAIJ(B);CHKERRQ(ierr); }
1965 #endif
1966   ierr = MatView_Private(B);CHKERRQ(ierr);
1967   PetscFunctionReturn(0);
1968 }
1969 EXTERN_C_END
1970 
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1973 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1974 {
1975   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1976   MatScalar    *aa=a->a,*v,*v1;
1977   PetscScalar  *x,*b,*t,sum,d;
1978   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1979   int          nz,nz1,*vj,*vj1,i;
1980 
1981   PetscFunctionBegin;
1982   its = its*lits;
1983   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1984 
1985   if (bs > 1)
1986     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1987 
1988   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1989   if (xx != bb) {
1990     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1991   } else {
1992     b = x;
1993   }
1994 
1995   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1996 
1997   if (flag & SOR_ZERO_INITIAL_GUESS) {
1998     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1999       for (i=0; i<m; i++)
2000         t[i] = b[i];
2001 
2002       for (i=0; i<m; i++){
2003         d  = *(aa + ai[i]);  /* diag[i] */
2004         v  = aa + ai[i] + 1;
2005         vj = aj + ai[i] + 1;
2006         nz = ai[i+1] - ai[i] - 1;
2007         x[i] = omega*t[i]/d;
2008         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2009         PetscLogFlops(2*nz-1);
2010       }
2011     }
2012 
2013     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2014       for (i=0; i<m; i++)
2015         t[i] = b[i];
2016 
2017       for (i=0; i<m-1; i++){  /* update rhs */
2018         v  = aa + ai[i] + 1;
2019         vj = aj + ai[i] + 1;
2020         nz = ai[i+1] - ai[i] - 1;
2021         while (nz--) t[*vj++] -= x[i]*(*v++);
2022         PetscLogFlops(2*nz-1);
2023       }
2024       for (i=m-1; i>=0; i--){
2025         d  = *(aa + ai[i]);
2026         v  = aa + ai[i] + 1;
2027         vj = aj + ai[i] + 1;
2028         nz = ai[i+1] - ai[i] - 1;
2029         sum = t[i];
2030         while (nz--) sum -= x[*vj++]*(*v++);
2031         PetscLogFlops(2*nz-1);
2032         x[i] =   (1-omega)*x[i] + omega*sum/d;
2033       }
2034     }
2035     its--;
2036   }
2037 
2038   while (its--) {
2039     /*
2040        forward sweep:
2041        for i=0,...,m-1:
2042          sum[i] = (b[i] - U(i,:)x )/d[i];
2043          x[i]   = (1-omega)x[i] + omega*sum[i];
2044          b      = b - x[i]*U^T(i,:);
2045 
2046     */
2047     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2048       for (i=0; i<m; i++)
2049         t[i] = b[i];
2050 
2051       for (i=0; i<m; i++){
2052         d  = *(aa + ai[i]);  /* diag[i] */
2053         v  = aa + ai[i] + 1; v1=v;
2054         vj = aj + ai[i] + 1; vj1=vj;
2055         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2056         sum = t[i];
2057         while (nz1--) sum -= (*v1++)*x[*vj1++];
2058         x[i] = (1-omega)*x[i] + omega*sum/d;
2059         while (nz--) t[*vj++] -= x[i]*(*v++);
2060         PetscLogFlops(4*nz-2);
2061       }
2062     }
2063 
2064   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2065       /*
2066        backward sweep:
2067        b = b - x[i]*U^T(i,:), i=0,...,n-2
2068        for i=m-1,...,0:
2069          sum[i] = (b[i] - U(i,:)x )/d[i];
2070          x[i]   = (1-omega)x[i] + omega*sum[i];
2071       */
2072       for (i=0; i<m; i++)
2073         t[i] = b[i];
2074 
2075       for (i=0; i<m-1; i++){  /* update rhs */
2076         v  = aa + ai[i] + 1;
2077         vj = aj + ai[i] + 1;
2078         nz = ai[i+1] - ai[i] - 1;
2079         while (nz--) t[*vj++] -= x[i]*(*v++);
2080         PetscLogFlops(2*nz-1);
2081       }
2082       for (i=m-1; i>=0; i--){
2083         d  = *(aa + ai[i]);
2084         v  = aa + ai[i] + 1;
2085         vj = aj + ai[i] + 1;
2086         nz = ai[i+1] - ai[i] - 1;
2087         sum = t[i];
2088         while (nz--) sum -= x[*vj++]*(*v++);
2089         PetscLogFlops(2*nz-1);
2090         x[i] =   (1-omega)*x[i] + omega*sum/d;
2091       }
2092     }
2093   }
2094 
2095   ierr = PetscFree(t); CHKERRQ(ierr);
2096   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2097   if (bb != xx) {
2098     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2099   }
2100 
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 
2105 
2106 
2107 
2108 
2109