xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 6c6c5352cdaa6aa3a8bd2967d4dd73a4e9ba1251)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int nz = a->i[n];
63     for (i=0; i<nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int nz = a->i[n]-1;
84     for (i=0; i<nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   case MAT_NOT_SYMMETRIC:
183   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
184   case MAT_HERMITIAN:
185     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
186   case MAT_SYMMETRIC:
187   case MAT_STRUCTURALLY_SYMMETRIC:
188   case MAT_NOT_HERMITIAN:
189   case MAT_SYMMETRY_ETERNAL:
190   case MAT_NOT_SYMMETRY_ETERNAL:
191     break;
192   default:
193     SETERRQ(PETSC_ERR_SUP,"unknown option");
194   }
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
200 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
201 {
202   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
203   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
204   MatScalar    *aa,*aa_i;
205   PetscScalar  *v_i;
206 
207   PetscFunctionBegin;
208   bs  = a->bs;
209   ai  = a->i;
210   aj  = a->j;
211   aa  = a->a;
212   bs2 = a->bs2;
213 
214   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
215 
216   bn  = row/bs;   /* Block number */
217   bp  = row % bs; /* Block position */
218   M   = ai[bn+1] - ai[bn];
219   *ncols = bs*M;
220 
221   if (v) {
222     *v = 0;
223     if (*ncols) {
224       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
225       for (i=0; i<M; i++) { /* for each block in the block row */
226         v_i  = *v + i*bs;
227         aa_i = aa + bs2*(ai[bn] + i);
228         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
229       }
230     }
231   }
232 
233   if (cols) {
234     *cols = 0;
235     if (*ncols) {
236       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
237       for (i=0; i<M; i++) { /* for each block in the block row */
238         cols_i = *cols + i*bs;
239         itmp  = bs*aj[ai[bn] + i];
240         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
241       }
242     }
243   }
244 
245   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
246   /* this segment is currently removed, so only entries in the upper triangle are obtained */
247 #ifdef column_search
248   v_i    = *v    + M*bs;
249   cols_i = *cols + M*bs;
250   for (i=0; i<bn; i++){ /* for each block row */
251     M = ai[i+1] - ai[i];
252     for (j=0; j<M; j++){
253       itmp = aj[ai[i] + j];    /* block column value */
254       if (itmp == bn){
255         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
256         for (k=0; k<bs; k++) {
257           *cols_i++ = i*bs+k;
258           *v_i++    = aa_i[k];
259         }
260         *ncols += bs;
261         break;
262       }
263     }
264   }
265 #endif
266 
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
272 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
273 {
274   int ierr;
275 
276   PetscFunctionBegin;
277   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
278   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
284 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
285 {
286   int ierr;
287   PetscFunctionBegin;
288   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
294 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
295 {
296   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
297   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
298   PetscScalar  *aa;
299   FILE         *file;
300 
301   PetscFunctionBegin;
302   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
303   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
304   col_lens[0] = MAT_FILE_COOKIE;
305 
306   col_lens[1] = A->m;
307   col_lens[2] = A->m;
308   col_lens[3] = a->nz*bs2;
309 
310   /* store lengths of each row and write (including header) to file */
311   count = 0;
312   for (i=0; i<a->mbs; i++) {
313     for (j=0; j<bs; j++) {
314       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
315     }
316   }
317   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
318   ierr = PetscFree(col_lens);CHKERRQ(ierr);
319 
320   /* store column indices (zero start index) */
321   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
322   count = 0;
323   for (i=0; i<a->mbs; i++) {
324     for (j=0; j<bs; j++) {
325       for (k=a->i[i]; k<a->i[i+1]; k++) {
326         for (l=0; l<bs; l++) {
327           jj[count++] = bs*a->j[k] + l;
328         }
329       }
330     }
331   }
332   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
333   ierr = PetscFree(jj);CHKERRQ(ierr);
334 
335   /* store nonzero values */
336   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
337   count = 0;
338   for (i=0; i<a->mbs; i++) {
339     for (j=0; j<bs; j++) {
340       for (k=a->i[i]; k<a->i[i+1]; k++) {
341         for (l=0; l<bs; l++) {
342           aa[count++] = a->a[bs2*k + l*bs + j];
343         }
344       }
345     }
346   }
347   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
348   ierr = PetscFree(aa);CHKERRQ(ierr);
349 
350   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
351   if (file) {
352     fprintf(file,"-matload_block_size %d\n",a->bs);
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
359 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
360 {
361   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
362   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
363   char              *name;
364   PetscViewerFormat format;
365 
366   PetscFunctionBegin;
367   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
368   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
369   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
370     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
371   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
372     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
373   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
374     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
375     for (i=0; i<a->mbs; i++) {
376       for (j=0; j<bs; j++) {
377         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
378         for (k=a->i[i]; k<a->i[i+1]; k++) {
379           for (l=0; l<bs; l++) {
380 #if defined(PETSC_USE_COMPLEX)
381             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
383                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
384             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
385               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
386                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
387             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
388               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
389             }
390 #else
391             if (a->a[bs2*k + l*bs + j] != 0.0) {
392               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
393             }
394 #endif
395           }
396         }
397         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
398       }
399     }
400     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
401   } else {
402     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
403     for (i=0; i<a->mbs; i++) {
404       for (j=0; j<bs; j++) {
405         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
406         for (k=a->i[i]; k<a->i[i+1]; k++) {
407           for (l=0; l<bs; l++) {
408 #if defined(PETSC_USE_COMPLEX)
409             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
410               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
411                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
412             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
413               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
414                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
415             } else {
416               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
417             }
418 #else
419             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
420 #endif
421           }
422         }
423         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
424       }
425     }
426     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
427   }
428   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
434 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
435 {
436   Mat          A = (Mat) Aa;
437   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
438   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
439   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
440   MatScalar    *aa;
441   MPI_Comm     comm;
442   PetscViewer  viewer;
443 
444   PetscFunctionBegin;
445   /*
446       This is nasty. If this is called from an originally parallel matrix
447    then all processes call this,but only the first has the matrix so the
448    rest should return immediately.
449   */
450   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
451   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
452   if (rank) PetscFunctionReturn(0);
453 
454   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
455 
456   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
457   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
458 
459   /* loop over matrix elements drawing boxes */
460   color = PETSC_DRAW_BLUE;
461   for (i=0,row=0; i<mbs; i++,row+=bs) {
462     for (j=a->i[i]; j<a->i[i+1]; j++) {
463       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
464       x_l = a->j[j]*bs; x_r = x_l + 1.0;
465       aa = a->a + j*bs2;
466       for (k=0; k<bs; k++) {
467         for (l=0; l<bs; l++) {
468           if (PetscRealPart(*aa++) >=  0.) continue;
469           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
470         }
471       }
472     }
473   }
474   color = PETSC_DRAW_CYAN;
475   for (i=0,row=0; i<mbs; i++,row+=bs) {
476     for (j=a->i[i]; j<a->i[i+1]; j++) {
477       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
478       x_l = a->j[j]*bs; x_r = x_l + 1.0;
479       aa = a->a + j*bs2;
480       for (k=0; k<bs; k++) {
481         for (l=0; l<bs; l++) {
482           if (PetscRealPart(*aa++) != 0.) continue;
483           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
484         }
485       }
486     }
487   }
488 
489   color = PETSC_DRAW_RED;
490   for (i=0,row=0; i<mbs; i++,row+=bs) {
491     for (j=a->i[i]; j<a->i[i+1]; j++) {
492       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
493       x_l = a->j[j]*bs; x_r = x_l + 1.0;
494       aa = a->a + j*bs2;
495       for (k=0; k<bs; k++) {
496         for (l=0; l<bs; l++) {
497           if (PetscRealPart(*aa++) <= 0.) continue;
498           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
499         }
500       }
501     }
502   }
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
508 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
509 {
510   int          ierr;
511   PetscReal    xl,yl,xr,yr,w,h;
512   PetscDraw    draw;
513   PetscTruth   isnull;
514 
515   PetscFunctionBegin;
516 
517   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
518   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
519 
520   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
521   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
522   xr += w;    yr += h;  xl = -w;     yl = -h;
523   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
524   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
525   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 #undef __FUNCT__
530 #define __FUNCT__ "MatView_SeqSBAIJ"
531 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
532 {
533   int        ierr;
534   PetscTruth issocket,isascii,isbinary,isdraw;
535 
536   PetscFunctionBegin;
537   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
538   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
539   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
540   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
541   if (issocket) {
542     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
543   } else if (isascii){
544     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
545   } else if (isbinary) {
546     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
547   } else if (isdraw) {
548     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
549   } else {
550     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 
556 #undef __FUNCT__
557 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
558 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
559 {
560   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
561   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
562   int        *ai = a->i,*ailen = a->ilen;
563   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
564   MatScalar  *ap,*aa = a->a,zero = 0.0;
565 
566   PetscFunctionBegin;
567   for (k=0; k<m; k++) { /* loop over rows */
568     row  = im[k]; brow = row/bs;
569     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
570     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
571     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
572     nrow = ailen[brow];
573     for (l=0; l<n; l++) { /* loop over columns */
574       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
575       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
576       col  = in[l] ;
577       bcol = col/bs;
578       cidx = col%bs;
579       ridx = row%bs;
580       high = nrow;
581       low  = 0; /* assume unsorted */
582       while (high-low > 5) {
583         t = (low+high)/2;
584         if (rp[t] > bcol) high = t;
585         else             low  = t;
586       }
587       for (i=low; i<high; i++) {
588         if (rp[i] > bcol) break;
589         if (rp[i] == bcol) {
590           *v++ = ap[bs2*i+bs*cidx+ridx];
591           goto finished;
592         }
593       }
594       *v++ = zero;
595       finished:;
596     }
597   }
598   PetscFunctionReturn(0);
599 }
600 
601 
602 #undef __FUNCT__
603 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
604 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
605 {
606   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
607   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
608   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
609   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
610   PetscTruth      roworiented=a->roworiented;
611   const MatScalar *value = v;
612   MatScalar       *ap,*aa = a->a,*bap;
613 
614   PetscFunctionBegin;
615   if (roworiented) {
616     stepval = (n-1)*bs;
617   } else {
618     stepval = (m-1)*bs;
619   }
620   for (k=0; k<m; k++) { /* loop over added rows */
621     row  = im[k];
622     if (row < 0) continue;
623 #if defined(PETSC_USE_BOPT_g)
624     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
625 #endif
626     rp   = aj + ai[row];
627     ap   = aa + bs2*ai[row];
628     rmax = imax[row];
629     nrow = ailen[row];
630     low  = 0;
631     for (l=0; l<n; l++) { /* loop over added columns */
632       if (in[l] < 0) continue;
633       col = in[l];
634 #if defined(PETSC_USE_BOPT_g)
635       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
636 #endif
637       if (col < row) continue; /* ignore lower triangular block */
638       if (roworiented) {
639         value = v + k*(stepval+bs)*bs + l*bs;
640       } else {
641         value = v + l*(stepval+bs)*bs + k*bs;
642       }
643       if (!sorted) low = 0; high = nrow;
644       while (high-low > 7) {
645         t = (low+high)/2;
646         if (rp[t] > col) high = t;
647         else             low  = t;
648       }
649       for (i=low; i<high; i++) {
650         if (rp[i] > col) break;
651         if (rp[i] == col) {
652           bap  = ap +  bs2*i;
653           if (roworiented) {
654             if (is == ADD_VALUES) {
655               for (ii=0; ii<bs; ii++,value+=stepval) {
656                 for (jj=ii; jj<bs2; jj+=bs) {
657                   bap[jj] += *value++;
658                 }
659               }
660             } else {
661               for (ii=0; ii<bs; ii++,value+=stepval) {
662                 for (jj=ii; jj<bs2; jj+=bs) {
663                   bap[jj] = *value++;
664                 }
665               }
666             }
667           } else {
668             if (is == ADD_VALUES) {
669               for (ii=0; ii<bs; ii++,value+=stepval) {
670                 for (jj=0; jj<bs; jj++) {
671                   *bap++ += *value++;
672                 }
673               }
674             } else {
675               for (ii=0; ii<bs; ii++,value+=stepval) {
676                 for (jj=0; jj<bs; jj++) {
677                   *bap++  = *value++;
678                 }
679               }
680             }
681           }
682           goto noinsert2;
683         }
684       }
685       if (nonew == 1) goto noinsert2;
686       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
687       if (nrow >= rmax) {
688         /* there is no extra room in row, therefore enlarge */
689         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
690         MatScalar *new_a;
691 
692         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
693 
694         /* malloc new storage space */
695         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
696 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
697         new_j   = (int*)(new_a + bs2*new_nz);
698         new_i   = new_j + new_nz;
699 
700         /* copy over old data into new slots */
701         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
702         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
703         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
704         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
705         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
706         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
707         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
708         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
709         /* free up old matrix storage */
710         ierr = PetscFree(a->a);CHKERRQ(ierr);
711         if (!a->singlemalloc) {
712           ierr = PetscFree(a->i);CHKERRQ(ierr);
713           ierr = PetscFree(a->j);CHKERRQ(ierr);
714         }
715         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
716         a->singlemalloc = PETSC_TRUE;
717 
718         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
719         rmax = imax[row] = imax[row] + CHUNKSIZE;
720         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
721         a->maxnz += bs2*CHUNKSIZE;
722         a->reallocs++;
723         a->nz++;
724       }
725       N = nrow++ - 1;
726       /* shift up all the later entries in this row */
727       for (ii=N; ii>=i; ii--) {
728         rp[ii+1] = rp[ii];
729         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
730       }
731       if (N >= i) {
732         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
733       }
734       rp[i] = col;
735       bap   = ap +  bs2*i;
736       if (roworiented) {
737         for (ii=0; ii<bs; ii++,value+=stepval) {
738           for (jj=ii; jj<bs2; jj+=bs) {
739             bap[jj] = *value++;
740           }
741         }
742       } else {
743         for (ii=0; ii<bs; ii++,value+=stepval) {
744           for (jj=0; jj<bs; jj++) {
745             *bap++  = *value++;
746           }
747         }
748       }
749       noinsert2:;
750       low = i;
751     }
752     ailen[row] = nrow;
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
759 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
760 {
761   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
762   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
763   int        m = A->m,*ip,N,*ailen = a->ilen;
764   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
765   MatScalar  *aa = a->a,*ap;
766 
767   PetscFunctionBegin;
768   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
769 
770   if (m) rmax = ailen[0];
771   for (i=1; i<mbs; i++) {
772     /* move each row back by the amount of empty slots (fshift) before it*/
773     fshift += imax[i-1] - ailen[i-1];
774     rmax   = PetscMax(rmax,ailen[i]);
775     if (fshift) {
776       ip = aj + ai[i]; ap = aa + bs2*ai[i];
777       N = ailen[i];
778       for (j=0; j<N; j++) {
779         ip[j-fshift] = ip[j];
780         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
781       }
782     }
783     ai[i] = ai[i-1] + ailen[i-1];
784   }
785   if (mbs) {
786     fshift += imax[mbs-1] - ailen[mbs-1];
787     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
788   }
789   /* reset ilen and imax for each row */
790   for (i=0; i<mbs; i++) {
791     ailen[i] = imax[i] = ai[i+1] - ai[i];
792   }
793   a->nz = ai[mbs];
794 
795   /* diagonals may have moved, reset it */
796   if (a->diag) {
797     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
798   }
799   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
800            m,A->m,a->bs,fshift*bs2,a->nz*bs2);
801   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
802            a->reallocs);
803   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
804   a->reallocs          = 0;
805   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
806 
807   PetscFunctionReturn(0);
808 }
809 
810 /*
811    This function returns an array of flags which indicate the locations of contiguous
812    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
813    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
814    Assume: sizes should be long enough to hold all the values.
815 */
816 #undef __FUNCT__
817 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
818 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
819 {
820   int        i,j,k,row;
821   PetscTruth flg;
822 
823   PetscFunctionBegin;
824   for (i=0,j=0; i<n; j++) {
825     row = idx[i];
826     if (row%bs!=0) { /* Not the begining of a block */
827       sizes[j] = 1;
828       i++;
829     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
830       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
831       i++;
832     } else { /* Begining of the block, so check if the complete block exists */
833       flg = PETSC_TRUE;
834       for (k=1; k<bs; k++) {
835         if (row+k != idx[i+k]) { /* break in the block */
836           flg = PETSC_FALSE;
837           break;
838         }
839       }
840       if (flg == PETSC_TRUE) { /* No break in the bs */
841         sizes[j] = bs;
842         i+= bs;
843       } else {
844         sizes[j] = 1;
845         i++;
846       }
847     }
848   }
849   *bs_max = j;
850   PetscFunctionReturn(0);
851 }
852 
853 #undef __FUNCT__
854 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
855 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
856 {
857   PetscFunctionBegin;
858   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
859 }
860 
861 /* Only add/insert a(i,j) with i<=j (blocks).
862    Any a(i,j) with i>j input by user is ingored.
863 */
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
867 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
868 {
869   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
870   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
871   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
872   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
873   int         ridx,cidx,bs2=a->bs2,ierr;
874   MatScalar   *ap,value,*aa=a->a,*bap;
875 
876   PetscFunctionBegin;
877 
878   for (k=0; k<m; k++) { /* loop over added rows */
879     row  = im[k];       /* row number */
880     brow = row/bs;      /* block row number */
881     if (row < 0) continue;
882 #if defined(PETSC_USE_BOPT_g)
883     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
884 #endif
885     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
886     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
887     rmax = imax[brow];  /* maximum space allocated for this row */
888     nrow = ailen[brow]; /* actual length of this row */
889     low  = 0;
890 
891     for (l=0; l<n; l++) { /* loop over added columns */
892       if (in[l] < 0) continue;
893 #if defined(PETSC_USE_BOPT_g)
894       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
895 #endif
896       col = in[l];
897       bcol = col/bs;              /* block col number */
898 
899       if (brow <= bcol){
900         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
901         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
902           /* element value a(k,l) */
903           if (roworiented) {
904             value = v[l + k*n];
905           } else {
906             value = v[k + l*m];
907           }
908 
909           /* move pointer bap to a(k,l) quickly and add/insert value */
910           if (!sorted) low = 0; high = nrow;
911           while (high-low > 7) {
912             t = (low+high)/2;
913             if (rp[t] > bcol) high = t;
914             else              low  = t;
915           }
916           for (i=low; i<high; i++) {
917             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
918             if (rp[i] > bcol) break;
919             if (rp[i] == bcol) {
920               bap  = ap +  bs2*i + bs*cidx + ridx;
921               if (is == ADD_VALUES) *bap += value;
922               else                  *bap  = value;
923               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
924               if (brow == bcol && ridx < cidx){
925                 bap  = ap +  bs2*i + bs*ridx + cidx;
926                 if (is == ADD_VALUES) *bap += value;
927                 else                  *bap  = value;
928               }
929               goto noinsert1;
930             }
931           }
932 
933       if (nonew == 1) goto noinsert1;
934       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
935       if (nrow >= rmax) {
936         /* there is no extra room in row, therefore enlarge */
937         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
938         MatScalar *new_a;
939 
940         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
941 
942         /* Malloc new storage space */
943         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
944         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
945         new_j = (int*)(new_a + bs2*new_nz);
946         new_i = new_j + new_nz;
947 
948         /* copy over old data into new slots */
949         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
950         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
951         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
952         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
953         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
954         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
955         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
956         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
957         /* free up old matrix storage */
958         ierr = PetscFree(a->a);CHKERRQ(ierr);
959         if (!a->singlemalloc) {
960           ierr = PetscFree(a->i);CHKERRQ(ierr);
961           ierr = PetscFree(a->j);CHKERRQ(ierr);
962         }
963         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
964         a->singlemalloc = PETSC_TRUE;
965 
966         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
967         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
968         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
969         a->maxnz += bs2*CHUNKSIZE;
970         a->reallocs++;
971         a->nz++;
972       }
973 
974       N = nrow++ - 1;
975       /* shift up all the later entries in this row */
976       for (ii=N; ii>=i; ii--) {
977         rp[ii+1] = rp[ii];
978         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
979       }
980       if (N>=i) {
981         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
982       }
983       rp[i]                      = bcol;
984       ap[bs2*i + bs*cidx + ridx] = value;
985       noinsert1:;
986       low = i;
987       /* } */
988         }
989       } /* end of if .. if..  */
990     }                     /* end of loop over added columns */
991     ailen[brow] = nrow;
992   }                       /* end of loop over added rows */
993 
994   PetscFunctionReturn(0);
995 }
996 
997 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
998 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
999 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
1000 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1001 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
1002 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
1003 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
1004 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
1005 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
1006 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1007 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1008 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1009 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1010 extern int MatZeroEntries_SeqSBAIJ(Mat);
1011 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1012 
1013 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1014 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1015 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1016 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1017 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1018 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1019 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1020 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1021 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1022 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1023 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1024 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1025 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1026 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1027 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1028 
1029 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1030 
1031 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1032 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1033 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1034 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1035 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1036 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1037 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1038 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1039 
1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1046 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1047 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1048 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1049 
1050 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1051 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1052 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1053 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1054 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1055 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1056 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1057 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1058 
1059 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1060 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1061 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1062 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1063 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1064 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1065 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1066 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1067 
1068 #ifdef HAVE_MatICCFactor
1069 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1072 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1073 {
1074   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1075   Mat         outA;
1076   int         ierr;
1077   PetscTruth  row_identity,col_identity;
1078 
1079   PetscFunctionBegin;
1080   /*
1081   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1082   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1083   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1084   if (!row_identity || !col_identity) {
1085     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1086   }
1087   */
1088 
1089   outA          = inA;
1090   inA->factor   = FACTOR_CHOLESKY;
1091 
1092   if (!a->diag) {
1093     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1094   }
1095   /*
1096       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1097       for ILU(0) factorization with natural ordering
1098   */
1099   switch (a->bs) {
1100   case 1:
1101     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1102     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1103     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1104     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1105   case 2:
1106     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1107     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1108     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1109     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1110     break;
1111   case 3:
1112     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1113     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1114     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1115     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1116     break;
1117   case 4:
1118     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1119     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1120     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1121     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1122     break;
1123   case 5:
1124     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1125     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1126     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1127     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1128     break;
1129   case 6:
1130     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1131     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1132     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1133     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1134     break;
1135   case 7:
1136     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1137     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1138     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1139     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1140     break;
1141   default:
1142     a->row        = row;
1143     a->icol       = col;
1144     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1145     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1146 
1147     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1148     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1149     PetscLogObjectParent(inA,a->icol);
1150 
1151     if (!a->solve_work) {
1152       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1153       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1154     }
1155   }
1156 
1157   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1158 
1159   PetscFunctionReturn(0);
1160 }
1161 #endif
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1165 int MatPrintHelp_SeqSBAIJ(Mat A)
1166 {
1167   static PetscTruth called = PETSC_FALSE;
1168   MPI_Comm          comm = A->comm;
1169   int               ierr;
1170 
1171   PetscFunctionBegin;
1172   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1173   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1174   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 EXTERN_C_BEGIN
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1181 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1182 {
1183   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1184   int         i,nz,n;
1185 
1186   PetscFunctionBegin;
1187   nz = baij->maxnz;
1188   n  = mat->n;
1189   for (i=0; i<nz; i++) {
1190     baij->j[i] = indices[i];
1191   }
1192   baij->nz = nz;
1193   for (i=0; i<n; i++) {
1194     baij->ilen[i] = baij->imax[i];
1195   }
1196 
1197   PetscFunctionReturn(0);
1198 }
1199 EXTERN_C_END
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1203 /*@
1204     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1205        in the matrix.
1206 
1207   Input Parameters:
1208 +  mat     - the SeqSBAIJ matrix
1209 -  indices - the column indices
1210 
1211   Level: advanced
1212 
1213   Notes:
1214     This can be called if you have precomputed the nonzero structure of the
1215   matrix and want to provide it to the matrix object to improve the performance
1216   of the MatSetValues() operation.
1217 
1218     You MUST have set the correct numbers of nonzeros per row in the call to
1219   MatCreateSeqSBAIJ().
1220 
1221     MUST be called before any calls to MatSetValues()
1222 
1223 .seealso: MatCreateSeqSBAIJ
1224 @*/
1225 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1226 {
1227   int ierr,(*f)(Mat,int *);
1228 
1229   PetscFunctionBegin;
1230   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1231   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1232   if (f) {
1233     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1234   } else {
1235     SETERRQ(1,"Wrong type of matrix to set column indices");
1236   }
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1242 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1243 {
1244   int        ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1253 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1254 {
1255   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1256   PetscFunctionBegin;
1257   *array = a->a;
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1263 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1264 {
1265   PetscFunctionBegin;
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #include "petscblaslapack.h"
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1272 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1273 {
1274   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1275   int          ierr,one=1,i,bs=y->bs,bs2,j;
1276 
1277   PetscFunctionBegin;
1278   if (str == SAME_NONZERO_PATTERN) {
1279     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1280   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1281     if (y->xtoy && y->XtoY != X) {
1282       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1283       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1284     }
1285     if (!y->xtoy) { /* get xtoy */
1286       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1287       y->XtoY = X;
1288     }
1289     bs2 = bs*bs;
1290     for (i=0; i<x->nz; i++) {
1291       j = 0;
1292       while (j < bs2){
1293         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1294         j++;
1295       }
1296     }
1297     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1298   } else {
1299     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1306 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1307 {
1308   PetscFunctionBegin;
1309   *flg = PETSC_TRUE;
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1315 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1316 {
1317   PetscFunctionBegin;
1318   *flg = PETSC_TRUE;
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1324 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1325 {
1326   PetscFunctionBegin;
1327   *flg = PETSC_FALSE;
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 /* -------------------------------------------------------------------*/
1332 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1333        MatGetRow_SeqSBAIJ,
1334        MatRestoreRow_SeqSBAIJ,
1335        MatMult_SeqSBAIJ_N,
1336 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1337        MatMultTranspose_SeqSBAIJ,
1338        MatMultTransposeAdd_SeqSBAIJ,
1339        MatSolve_SeqSBAIJ_N,
1340        0,
1341        0,
1342 /*10*/ 0,
1343        0,
1344        MatCholeskyFactor_SeqSBAIJ,
1345        MatRelax_SeqSBAIJ,
1346        MatTranspose_SeqSBAIJ,
1347 /*15*/ MatGetInfo_SeqSBAIJ,
1348        MatEqual_SeqSBAIJ,
1349        MatGetDiagonal_SeqSBAIJ,
1350        MatDiagonalScale_SeqSBAIJ,
1351        MatNorm_SeqSBAIJ,
1352 /*20*/ 0,
1353        MatAssemblyEnd_SeqSBAIJ,
1354        0,
1355        MatSetOption_SeqSBAIJ,
1356        MatZeroEntries_SeqSBAIJ,
1357 /*25*/ MatZeroRows_SeqSBAIJ,
1358        0,
1359        0,
1360        MatCholeskyFactorSymbolic_SeqSBAIJ,
1361        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1362 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1363        0,
1364        MatICCFactorSymbolic_SeqSBAIJ,
1365        MatGetArray_SeqSBAIJ,
1366        MatRestoreArray_SeqSBAIJ,
1367 /*35*/ MatDuplicate_SeqSBAIJ,
1368        0,
1369        0,
1370        0,
1371        0,
1372 /*40*/ MatAXPY_SeqSBAIJ,
1373        MatGetSubMatrices_SeqSBAIJ,
1374        MatIncreaseOverlap_SeqSBAIJ,
1375        MatGetValues_SeqSBAIJ,
1376        0,
1377 /*45*/ MatPrintHelp_SeqSBAIJ,
1378        MatScale_SeqSBAIJ,
1379        0,
1380        0,
1381        0,
1382 /*50*/ MatGetBlockSize_SeqSBAIJ,
1383        MatGetRowIJ_SeqSBAIJ,
1384        MatRestoreRowIJ_SeqSBAIJ,
1385        0,
1386        0,
1387 /*55*/ 0,
1388        0,
1389        0,
1390        0,
1391        MatSetValuesBlocked_SeqSBAIJ,
1392 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1393        0,
1394        0,
1395        MatGetPetscMaps_Petsc,
1396        0,
1397 /*65*/ 0,
1398        0,
1399        0,
1400        0,
1401        0,
1402 /*70*/ MatGetRowMax_SeqSBAIJ,
1403        0,
1404        0,
1405        0,
1406        0,
1407 /*75*/ 0,
1408        0,
1409        0,
1410        0,
1411        0,
1412 /*80*/ 0,
1413        0,
1414        0,
1415 #if !defined(PETSC_USE_COMPLEX)
1416        MatGetInertia_SeqSBAIJ,
1417 #else
1418        0,
1419 #endif
1420 /*85*/ MatLoad_SeqSBAIJ,
1421        MatIsSymmetric_SeqSBAIJ,
1422        MatIsStructurallySymmetric_SeqSBAIJ,
1423        MatIsHermitian_SeqSBAIJ
1424 };
1425 
1426 EXTERN_C_BEGIN
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1429 int MatStoreValues_SeqSBAIJ(Mat mat)
1430 {
1431   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1432   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1433   int         ierr;
1434 
1435   PetscFunctionBegin;
1436   if (aij->nonew != 1) {
1437     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1438   }
1439 
1440   /* allocate space for values if not already there */
1441   if (!aij->saved_values) {
1442     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1443   }
1444 
1445   /* copy values over */
1446   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1447   PetscFunctionReturn(0);
1448 }
1449 EXTERN_C_END
1450 
1451 EXTERN_C_BEGIN
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1454 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1455 {
1456   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1457   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1458 
1459   PetscFunctionBegin;
1460   if (aij->nonew != 1) {
1461     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1462   }
1463   if (!aij->saved_values) {
1464     SETERRQ(1,"Must call MatStoreValues(A);first");
1465   }
1466 
1467   /* copy values over */
1468   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1469   PetscFunctionReturn(0);
1470 }
1471 EXTERN_C_END
1472 
1473 EXTERN_C_BEGIN
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1476 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1477 {
1478   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1479   int          i,len,ierr,mbs,bs2;
1480   PetscTruth   flg;
1481 
1482   PetscFunctionBegin;
1483   B->preallocated = PETSC_TRUE;
1484   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1485   mbs  = B->m/bs;
1486   bs2  = bs*bs;
1487 
1488   if (mbs*bs != B->m) {
1489     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1490   }
1491 
1492   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1493   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1494   if (nnz) {
1495     for (i=0; i<mbs; i++) {
1496       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1497       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1498     }
1499   }
1500 
1501   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1502   if (!flg) {
1503     switch (bs) {
1504     case 1:
1505       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1506       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1507       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1508       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1509       B->ops->mult            = MatMult_SeqSBAIJ_1;
1510       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1511       break;
1512     case 2:
1513       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1514       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1515       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1516       B->ops->mult            = MatMult_SeqSBAIJ_2;
1517       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1518       break;
1519     case 3:
1520       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1521       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1522       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1523       B->ops->mult            = MatMult_SeqSBAIJ_3;
1524       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1525       break;
1526     case 4:
1527       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1528       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1529       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1530       B->ops->mult            = MatMult_SeqSBAIJ_4;
1531       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1532       break;
1533     case 5:
1534       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1535       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1536       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1537       B->ops->mult            = MatMult_SeqSBAIJ_5;
1538       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1539       break;
1540     case 6:
1541       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1542       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1543       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1544       B->ops->mult            = MatMult_SeqSBAIJ_6;
1545       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1546       break;
1547     case 7:
1548       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1549       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1550       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1551       B->ops->mult            = MatMult_SeqSBAIJ_7;
1552       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1553       break;
1554     }
1555   }
1556 
1557   b->mbs = mbs;
1558   b->nbs = mbs;
1559   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1560   if (!nnz) {
1561     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1562     else if (nz <= 0)        nz = 1;
1563     for (i=0; i<mbs; i++) {
1564       b->imax[i] = nz;
1565     }
1566     nz = nz*mbs; /* total nz */
1567   } else {
1568     nz = 0;
1569     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1570   }
1571   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1572 
1573   /* allocate the matrix space */
1574   len  = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1575   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1576   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1577   b->j = (int*)(b->a + nz*bs2);
1578   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1579   b->i = b->j + nz;
1580   b->singlemalloc = PETSC_TRUE;
1581 
1582   /* pointer to beginning of each row */
1583   b->i[0] = 0;
1584   for (i=1; i<mbs+1; i++) {
1585     b->i[i] = b->i[i-1] + b->imax[i-1];
1586   }
1587 
1588   /* b->ilen will count nonzeros in each block row so far. */
1589   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1590   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1591   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1592 
1593   b->bs               = bs;
1594   b->bs2              = bs2;
1595   b->nz             = 0;
1596   b->maxnz          = nz*bs2;
1597 
1598   b->inew             = 0;
1599   b->jnew             = 0;
1600   b->anew             = 0;
1601   b->a2anew           = 0;
1602   b->permute          = PETSC_FALSE;
1603   PetscFunctionReturn(0);
1604 }
1605 EXTERN_C_END
1606 
1607 /*MC
1608    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1609    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1610 
1611    Options Database Keys:
1612 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1613 
1614   Level: beginner
1615 
1616 .seealso: MatCreateSeqSBAIJ
1617 M*/
1618 
1619 EXTERN_C_BEGIN
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1622 int MatCreate_SeqSBAIJ(Mat B)
1623 {
1624   Mat_SeqSBAIJ *b;
1625   int          ierr,size;
1626 
1627   PetscFunctionBegin;
1628   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1629   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1630   B->m = B->M = PetscMax(B->m,B->M);
1631   B->n = B->N = PetscMax(B->n,B->N);
1632 
1633   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1634   B->data = (void*)b;
1635   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1636   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1637   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1638   B->ops->view        = MatView_SeqSBAIJ;
1639   B->factor           = 0;
1640   B->lupivotthreshold = 1.0;
1641   B->mapping          = 0;
1642   b->row              = 0;
1643   b->icol             = 0;
1644   b->reallocs         = 0;
1645   b->saved_values     = 0;
1646 
1647   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1648   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1649 
1650   b->sorted           = PETSC_FALSE;
1651   b->roworiented      = PETSC_TRUE;
1652   b->nonew            = 0;
1653   b->diag             = 0;
1654   b->solve_work       = 0;
1655   b->mult_work        = 0;
1656   B->spptr            = 0;
1657   b->keepzeroedrows   = PETSC_FALSE;
1658   b->xtoy             = 0;
1659   b->XtoY             = 0;
1660 
1661   b->inew             = 0;
1662   b->jnew             = 0;
1663   b->anew             = 0;
1664   b->a2anew           = 0;
1665   b->permute          = PETSC_FALSE;
1666 
1667   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1668                                      "MatStoreValues_SeqSBAIJ",
1669                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1670   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1671                                      "MatRetrieveValues_SeqSBAIJ",
1672                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1673   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1674                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1675                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1677                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1678                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 EXTERN_C_END
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1685 /*@C
1686    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1687    compressed row) format.  For good matrix assembly performance the
1688    user should preallocate the matrix storage by setting the parameter nz
1689    (or the array nnz).  By setting these parameters accurately, performance
1690    during matrix assembly can be increased by more than a factor of 50.
1691 
1692    Collective on Mat
1693 
1694    Input Parameters:
1695 +  A - the symmetric matrix
1696 .  bs - size of block
1697 .  nz - number of block nonzeros per block row (same for all rows)
1698 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1699          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1700 
1701    Options Database Keys:
1702 .   -mat_no_unroll - uses code that does not unroll the loops in the
1703                      block calculations (much slower)
1704 .    -mat_block_size - size of the blocks to use
1705 
1706    Level: intermediate
1707 
1708    Notes:
1709    Specify the preallocated storage with either nz or nnz (not both).
1710    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1711    allocation.  For additional details, see the users manual chapter on
1712    matrices.
1713 
1714 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1715 @*/
1716 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1717   int ierr,(*f)(Mat,int,int,const int[]);
1718 
1719   PetscFunctionBegin;
1720   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1721   if (f) {
1722     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1723   }
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatCreateSeqSBAIJ"
1729 /*@C
1730    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1731    compressed row) format.  For good matrix assembly performance the
1732    user should preallocate the matrix storage by setting the parameter nz
1733    (or the array nnz).  By setting these parameters accurately, performance
1734    during matrix assembly can be increased by more than a factor of 50.
1735 
1736    Collective on MPI_Comm
1737 
1738    Input Parameters:
1739 +  comm - MPI communicator, set to PETSC_COMM_SELF
1740 .  bs - size of block
1741 .  m - number of rows, or number of columns
1742 .  nz - number of block nonzeros per block row (same for all rows)
1743 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1744          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1745 
1746    Output Parameter:
1747 .  A - the symmetric matrix
1748 
1749    Options Database Keys:
1750 .   -mat_no_unroll - uses code that does not unroll the loops in the
1751                      block calculations (much slower)
1752 .    -mat_block_size - size of the blocks to use
1753 
1754    Level: intermediate
1755 
1756    Notes:
1757 
1758    Specify the preallocated storage with either nz or nnz (not both).
1759    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1760    allocation.  For additional details, see the users manual chapter on
1761    matrices.
1762 
1763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1764 @*/
1765 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1766 {
1767   int ierr;
1768 
1769   PetscFunctionBegin;
1770   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1771   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1772   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1778 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1779 {
1780   Mat         C;
1781   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1782   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1783 
1784   PetscFunctionBegin;
1785   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1786 
1787   *B = 0;
1788   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1789   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1790   c    = (Mat_SeqSBAIJ*)C->data;
1791 
1792   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1793   C->preallocated   = PETSC_TRUE;
1794   C->factor         = A->factor;
1795   c->row            = 0;
1796   c->icol           = 0;
1797   c->saved_values   = 0;
1798   c->keepzeroedrows = a->keepzeroedrows;
1799   C->assembled      = PETSC_TRUE;
1800 
1801   c->bs         = a->bs;
1802   c->bs2        = a->bs2;
1803   c->mbs        = a->mbs;
1804   c->nbs        = a->nbs;
1805 
1806   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1807   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1808   for (i=0; i<mbs; i++) {
1809     c->imax[i] = a->imax[i];
1810     c->ilen[i] = a->ilen[i];
1811   }
1812 
1813   /* allocate the matrix space */
1814   c->singlemalloc = PETSC_TRUE;
1815   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1816   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1817   c->j = (int*)(c->a + nz*bs2);
1818   c->i = c->j + nz;
1819   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1820   if (mbs > 0) {
1821     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1822     if (cpvalues == MAT_COPY_VALUES) {
1823       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1824     } else {
1825       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1826     }
1827   }
1828 
1829   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1830   c->sorted      = a->sorted;
1831   c->roworiented = a->roworiented;
1832   c->nonew       = a->nonew;
1833 
1834   if (a->diag) {
1835     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1836     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1837     for (i=0; i<mbs; i++) {
1838       c->diag[i] = a->diag[i];
1839     }
1840   } else c->diag        = 0;
1841   c->nz               = a->nz;
1842   c->maxnz            = a->maxnz;
1843   c->solve_work         = 0;
1844   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1845   c->mult_work          = 0;
1846   *B = C;
1847   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1853 int MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1854 {
1855   Mat_SeqSBAIJ *a;
1856   Mat          B;
1857   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1858   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1859   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1860   int          *masked,nmask,tmp,bs2,ishift;
1861   PetscScalar  *aa;
1862   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1863 
1864   PetscFunctionBegin;
1865   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1866   bs2  = bs*bs;
1867 
1868   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1869   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1870   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1871   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1872   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1873   M = header[1]; N = header[2]; nz = header[3];
1874 
1875   if (header[3] < 0) {
1876     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1877   }
1878 
1879   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1880 
1881   /*
1882      This code adds extra rows to make sure the number of rows is
1883     divisible by the blocksize
1884   */
1885   mbs        = M/bs;
1886   extra_rows = bs - M + bs*(mbs);
1887   if (extra_rows == bs) extra_rows = 0;
1888   else                  mbs++;
1889   if (extra_rows) {
1890     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1891   }
1892 
1893   /* read in row lengths */
1894   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1895   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1896   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1897 
1898   /* read in column indices */
1899   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1900   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1901   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1902 
1903   /* loop over row lengths determining block row lengths */
1904   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1905   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1906   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1907   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1908   masked   = mask + mbs;
1909   rowcount = 0; nzcount = 0;
1910   for (i=0; i<mbs; i++) {
1911     nmask = 0;
1912     for (j=0; j<bs; j++) {
1913       kmax = rowlengths[rowcount];
1914       for (k=0; k<kmax; k++) {
1915         tmp = jj[nzcount++]/bs;   /* block col. index */
1916         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1917       }
1918       rowcount++;
1919     }
1920     s_browlengths[i] += nmask;
1921 
1922     /* zero out the mask elements we set */
1923     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1924   }
1925 
1926   /* create our matrix */
1927   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1928   ierr = MatSetType(B,type);CHKERRQ(ierr);
1929   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1930   a = (Mat_SeqSBAIJ*)B->data;
1931 
1932   /* set matrix "i" values */
1933   a->i[0] = 0;
1934   for (i=1; i<= mbs; i++) {
1935     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1936     a->ilen[i-1] = s_browlengths[i-1];
1937   }
1938   a->nz = a->i[mbs];
1939 
1940   /* read in nonzero values */
1941   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1942   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1943   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1944 
1945   /* set "a" and "j" values into matrix */
1946   nzcount = 0; jcount = 0;
1947   for (i=0; i<mbs; i++) {
1948     nzcountb = nzcount;
1949     nmask    = 0;
1950     for (j=0; j<bs; j++) {
1951       kmax = rowlengths[i*bs+j];
1952       for (k=0; k<kmax; k++) {
1953         tmp = jj[nzcount++]/bs; /* block col. index */
1954         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1955       }
1956     }
1957     /* sort the masked values */
1958     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1959 
1960     /* set "j" values into matrix */
1961     maskcount = 1;
1962     for (j=0; j<nmask; j++) {
1963       a->j[jcount++]  = masked[j];
1964       mask[masked[j]] = maskcount++;
1965     }
1966 
1967     /* set "a" values into matrix */
1968     ishift = bs2*a->i[i];
1969     for (j=0; j<bs; j++) {
1970       kmax = rowlengths[i*bs+j];
1971       for (k=0; k<kmax; k++) {
1972         tmp       = jj[nzcountb]/bs ; /* block col. index */
1973         if (tmp >= i){
1974           block     = mask[tmp] - 1;
1975           point     = jj[nzcountb] - bs*tmp;
1976           idx       = ishift + bs2*block + j + bs*point;
1977           a->a[idx] = aa[nzcountb];
1978         }
1979         nzcountb++;
1980       }
1981     }
1982     /* zero out the mask elements we set */
1983     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1984   }
1985   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1986 
1987   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1988   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1989   ierr = PetscFree(aa);CHKERRQ(ierr);
1990   ierr = PetscFree(jj);CHKERRQ(ierr);
1991   ierr = PetscFree(mask);CHKERRQ(ierr);
1992 
1993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1995   ierr = MatView_Private(B);CHKERRQ(ierr);
1996   *A = B;
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatRelax_SeqSBAIJ"
2002 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2003 {
2004   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
2005   MatScalar    *aa=a->a,*v,*v1;
2006   PetscScalar  *x,*b,*t,sum,d;
2007   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
2008   int          nz,nz1,*vj,*vj1,i;
2009 
2010   PetscFunctionBegin;
2011   its = its*lits;
2012   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2013 
2014   if (bs > 1)
2015     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2016 
2017   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2018   if (xx != bb) {
2019     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2020   } else {
2021     b = x;
2022   }
2023 
2024   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
2025 
2026   if (flag & SOR_ZERO_INITIAL_GUESS) {
2027     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2028       for (i=0; i<m; i++)
2029         t[i] = b[i];
2030 
2031       for (i=0; i<m; i++){
2032         d  = *(aa + ai[i]);  /* diag[i] */
2033         v  = aa + ai[i] + 1;
2034         vj = aj + ai[i] + 1;
2035         nz = ai[i+1] - ai[i] - 1;
2036         x[i] = omega*t[i]/d;
2037         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2038         PetscLogFlops(2*nz-1);
2039       }
2040     }
2041 
2042     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2043       for (i=0; i<m; i++)
2044         t[i] = b[i];
2045 
2046       for (i=0; i<m-1; i++){  /* update rhs */
2047         v  = aa + ai[i] + 1;
2048         vj = aj + ai[i] + 1;
2049         nz = ai[i+1] - ai[i] - 1;
2050         while (nz--) t[*vj++] -= x[i]*(*v++);
2051         PetscLogFlops(2*nz-1);
2052       }
2053       for (i=m-1; i>=0; i--){
2054         d  = *(aa + ai[i]);
2055         v  = aa + ai[i] + 1;
2056         vj = aj + ai[i] + 1;
2057         nz = ai[i+1] - ai[i] - 1;
2058         sum = t[i];
2059         while (nz--) sum -= x[*vj++]*(*v++);
2060         PetscLogFlops(2*nz-1);
2061         x[i] =   (1-omega)*x[i] + omega*sum/d;
2062       }
2063     }
2064     its--;
2065   }
2066 
2067   while (its--) {
2068     /*
2069        forward sweep:
2070        for i=0,...,m-1:
2071          sum[i] = (b[i] - U(i,:)x )/d[i];
2072          x[i]   = (1-omega)x[i] + omega*sum[i];
2073          b      = b - x[i]*U^T(i,:);
2074 
2075     */
2076     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2077       for (i=0; i<m; i++)
2078         t[i] = b[i];
2079 
2080       for (i=0; i<m; i++){
2081         d  = *(aa + ai[i]);  /* diag[i] */
2082         v  = aa + ai[i] + 1; v1=v;
2083         vj = aj + ai[i] + 1; vj1=vj;
2084         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2085         sum = t[i];
2086         while (nz1--) sum -= (*v1++)*x[*vj1++];
2087         x[i] = (1-omega)*x[i] + omega*sum/d;
2088         while (nz--) t[*vj++] -= x[i]*(*v++);
2089         PetscLogFlops(4*nz-2);
2090       }
2091     }
2092 
2093   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2094       /*
2095        backward sweep:
2096        b = b - x[i]*U^T(i,:), i=0,...,n-2
2097        for i=m-1,...,0:
2098          sum[i] = (b[i] - U(i,:)x )/d[i];
2099          x[i]   = (1-omega)x[i] + omega*sum[i];
2100       */
2101       for (i=0; i<m; i++)
2102         t[i] = b[i];
2103 
2104       for (i=0; i<m-1; i++){  /* update rhs */
2105         v  = aa + ai[i] + 1;
2106         vj = aj + ai[i] + 1;
2107         nz = ai[i+1] - ai[i] - 1;
2108         while (nz--) t[*vj++] -= x[i]*(*v++);
2109         PetscLogFlops(2*nz-1);
2110       }
2111       for (i=m-1; i>=0; i--){
2112         d  = *(aa + ai[i]);
2113         v  = aa + ai[i] + 1;
2114         vj = aj + ai[i] + 1;
2115         nz = ai[i+1] - ai[i] - 1;
2116         sum = t[i];
2117         while (nz--) sum -= x[*vj++]*(*v++);
2118         PetscLogFlops(2*nz-1);
2119         x[i] =   (1-omega)*x[i] + omega*sum/d;
2120       }
2121     }
2122   }
2123 
2124   ierr = PetscFree(t); CHKERRQ(ierr);
2125   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2126   if (bb != xx) {
2127     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2128   }
2129 
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 
2134 
2135 
2136 
2137 
2138