xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision e98b92d7eaea959d0b5b2539bdf884f74f677bdd)
1 /*$Id: sbaij.c,v 1.57 2001/06/16 02:48:46 bsmith Exp buschelm $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                            /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 #include "src/mat/impls/sbaij/seq/sbaij.h"
12 
13 #define CHUNKSIZE  10
14 
15 /*
16      Checks for missing diagonals
17 */
18 #undef __FUNCT__
19 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
20 int MatMissingDiagonal_SeqSBAIJ(Mat A)
21 {
22   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
23   int         *diag,*jj = a->j,i,ierr;
24 
25   PetscFunctionBegin;
26   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
27   diag = a->diag;
28   for (i=0; i<a->mbs; i++) {
29     if (jj[diag[i]] != i) {
30       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
31     }
32   }
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
38 int MatMarkDiagonal_SeqSBAIJ(Mat A)
39 {
40   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
41   int          i,j,*diag,m = a->mbs,ierr;
42 
43   PetscFunctionBegin;
44   if (a->diag) PetscFunctionReturn(0);
45 
46   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
47   PetscLogObjectMemory(A,(m+1)*sizeof(int));
48   for (i=0; i<m; i++) {
49     diag[i] = a->i[i+1];
50     for (j=a->i[i]; j<a->i[i+1]; j++) {
51       if (a->j[j] == i) {
52         diag[i] = j;
53         break;
54       }
55     }
56   }
57   a->diag = diag;
58   PetscFunctionReturn(0);
59 }
60 
61 
62 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
63 
64 #undef __FUNCT__
65 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
66 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
67 {
68   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
69 
70   PetscFunctionBegin;
71   if (ia) {
72     SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
73   }
74   *nn = a->mbs;
75   PetscFunctionReturn(0);
76 }
77 
78 #undef __FUNCT__
79 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
80 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
81 {
82   PetscFunctionBegin;
83   if (!ia) PetscFunctionReturn(0);
84   SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
85   /* PetscFunctionReturn(0); */
86 }
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
90 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
91 {
92   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
93 
94   PetscFunctionBegin;
95   *bs = sbaij->bs;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
101 int MatDestroy_SeqSBAIJ(Mat A)
102 {
103   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
104   int         ierr;
105 
106   PetscFunctionBegin;
107 #if defined(PETSC_USE_LOG)
108   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
109 #endif
110   ierr = PetscFree(a->a);CHKERRQ(ierr);
111   if (!a->singlemalloc) {
112     ierr = PetscFree(a->i);CHKERRQ(ierr);
113     ierr = PetscFree(a->j);CHKERRQ(ierr);
114   }
115   if (a->row) {
116     ierr = ISDestroy(a->row);CHKERRQ(ierr);
117   }
118   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
119   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
120   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
121   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
122   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
125 
126   if (a->inew){
127     ierr = PetscFree(a->inew);CHKERRQ(ierr);
128     a->inew = 0;
129   }
130   ierr = PetscFree(a);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
136 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
137 {
138   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
139 
140   PetscFunctionBegin;
141   switch (op) {
142   case MAT_ROW_ORIENTED:
143     a->roworiented    = PETSC_TRUE;
144     break;
145   case MAT_COLUMN_ORIENTED:
146     a->roworiented    = PETSC_FALSE;
147     break;
148   case MAT_COLUMNS_SORTED:
149     a->sorted         = PETSC_TRUE;
150     break;
151   case MAT_COLUMNS_UNSORTED:
152     a->sorted         = PETSC_FALSE;
153     break;
154   case MAT_KEEP_ZEROED_ROWS:
155     a->keepzeroedrows = PETSC_TRUE;
156     break;
157   case MAT_NO_NEW_NONZERO_LOCATIONS:
158     a->nonew          = 1;
159     break;
160   case MAT_NEW_NONZERO_LOCATION_ERR:
161     a->nonew          = -1;
162     break;
163   case MAT_NEW_NONZERO_ALLOCATION_ERR:
164     a->nonew          = -2;
165     break;
166   case MAT_YES_NEW_NONZERO_LOCATIONS:
167     a->nonew          = 0;
168     break;
169   case MAT_ROWS_SORTED:
170   case MAT_ROWS_UNSORTED:
171   case MAT_YES_NEW_DIAGONALS:
172   case MAT_IGNORE_OFF_PROC_ENTRIES:
173   case MAT_USE_HASH_TABLE:
174     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
175     break;
176   case MAT_NO_NEW_DIAGONALS:
177     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
178     break;
179   default:
180     SETERRQ(PETSC_ERR_SUP,"unknown option");
181     break;
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatGetOwnershipRange_SeqSBAIJ"
188 int MatGetOwnershipRange_SeqSBAIJ(Mat A,int *m,int *n)
189 {
190   PetscFunctionBegin;
191   *m = 0;
192   *n = A->m;
193   PetscFunctionReturn(0);
194 }
195 
196 #undef __FUNCT__
197 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
198 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,Scalar **v)
199 {
200   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
201   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
202   MatScalar    *aa,*aa_i;
203   Scalar       *v_i;
204 
205   PetscFunctionBegin;
206   bs  = a->bs;
207   ai  = a->i;
208   aj  = a->j;
209   aa  = a->a;
210   bs2 = a->bs2;
211 
212   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
213 
214   bn  = row/bs;   /* Block number */
215   bp  = row % bs; /* Block position */
216   M   = ai[bn+1] - ai[bn];
217   *ncols = bs*M;
218 
219   if (v) {
220     *v = 0;
221     if (*ncols) {
222       ierr = PetscMalloc((*ncols+row)*sizeof(Scalar),v);CHKERRQ(ierr);
223       for (i=0; i<M; i++) { /* for each block in the block row */
224         v_i  = *v + i*bs;
225         aa_i = aa + bs2*(ai[bn] + i);
226         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
227       }
228     }
229   }
230 
231   if (cols) {
232     *cols = 0;
233     if (*ncols) {
234       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
235       for (i=0; i<M; i++) { /* for each block in the block row */
236         cols_i = *cols + i*bs;
237         itmp  = bs*aj[ai[bn] + i];
238         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
239       }
240     }
241   }
242 
243   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
244   /* this segment is currently removed, so only entries in the upper triangle are obtained */
245 #ifdef column_search
246   v_i    = *v    + M*bs;
247   cols_i = *cols + M*bs;
248   for (i=0; i<bn; i++){ /* for each block row */
249     M = ai[i+1] - ai[i];
250     for (j=0; j<M; j++){
251       itmp = aj[ai[i] + j];    /* block column value */
252       if (itmp == bn){
253         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
254         for (k=0; k<bs; k++) {
255           *cols_i++ = i*bs+k;
256           *v_i++    = aa_i[k];
257         }
258         *ncols += bs;
259         break;
260       }
261     }
262   }
263 #endif
264 
265   PetscFunctionReturn(0);
266 }
267 
268 #undef __FUNCT__
269 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
270 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
271 {
272   int ierr;
273 
274   PetscFunctionBegin;
275   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
276   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
282 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
283 {
284   PetscFunctionBegin;
285   SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
286   /* PetscFunctionReturn(0); */
287 }
288 
289 #undef __FUNCT__
290 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
291 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
292 {
293   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
294   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
295   Scalar       *aa;
296   FILE         *file;
297 
298   PetscFunctionBegin;
299   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
300   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
301   col_lens[0] = MAT_COOKIE;
302 
303   col_lens[1] = A->m;
304   col_lens[2] = A->m;
305   col_lens[3] = a->s_nz*bs2;
306 
307   /* store lengths of each row and write (including header) to file */
308   count = 0;
309   for (i=0; i<a->mbs; i++) {
310     for (j=0; j<bs; j++) {
311       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
312     }
313   }
314   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
315   ierr = PetscFree(col_lens);CHKERRQ(ierr);
316 
317   /* store column indices (zero start index) */
318   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
319   count = 0;
320   for (i=0; i<a->mbs; i++) {
321     for (j=0; j<bs; j++) {
322       for (k=a->i[i]; k<a->i[i+1]; k++) {
323         for (l=0; l<bs; l++) {
324           jj[count++] = bs*a->j[k] + l;
325         }
326       }
327     }
328   }
329   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
330   ierr = PetscFree(jj);CHKERRQ(ierr);
331 
332   /* store nonzero values */
333   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr);
334   count = 0;
335   for (i=0; i<a->mbs; i++) {
336     for (j=0; j<bs; j++) {
337       for (k=a->i[i]; k<a->i[i+1]; k++) {
338         for (l=0; l<bs; l++) {
339           aa[count++] = a->a[bs2*k + l*bs + j];
340         }
341       }
342     }
343   }
344   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
345   ierr = PetscFree(aa);CHKERRQ(ierr);
346 
347   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
348   if (file) {
349     fprintf(file,"-matload_block_size %d\n",a->bs);
350   }
351   PetscFunctionReturn(0);
352 }
353 
354 #undef __FUNCT__
355 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
356 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
357 {
358   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
359   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
360   char              *name;
361   PetscViewerFormat format;
362 
363   PetscFunctionBegin;
364   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
365   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
366   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
367     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
368   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
369     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
370   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
371     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
372     for (i=0; i<a->mbs; i++) {
373       for (j=0; j<bs; j++) {
374         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
375         for (k=a->i[i]; k<a->i[i+1]; k++) {
376           for (l=0; l<bs; l++) {
377 #if defined(PETSC_USE_COMPLEX)
378             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
379               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
380                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
381             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
383                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
384             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
385               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
386             }
387 #else
388             if (a->a[bs2*k + l*bs + j] != 0.0) {
389               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
390             }
391 #endif
392           }
393         }
394         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
395       }
396     }
397     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
398   } else {
399     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
400     for (i=0; i<a->mbs; i++) {
401       for (j=0; j<bs; j++) {
402         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
403         for (k=a->i[i]; k<a->i[i+1]; k++) {
404           for (l=0; l<bs; l++) {
405 #if defined(PETSC_USE_COMPLEX)
406             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
407               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
408                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
409             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
410               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
411                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
412             } else {
413               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
414             }
415 #else
416             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
417 #endif
418           }
419         }
420         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
421       }
422     }
423     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
424   }
425   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
431 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
432 {
433   Mat          A = (Mat) Aa;
434   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
435   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
436   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
437   MatScalar    *aa;
438   MPI_Comm     comm;
439   PetscViewer  viewer;
440 
441   PetscFunctionBegin;
442   /*
443       This is nasty. If this is called from an originally parallel matrix
444    then all processes call this,but only the first has the matrix so the
445    rest should return immediately.
446   */
447   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
448   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
449   if (rank) PetscFunctionReturn(0);
450 
451   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
452 
453   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
454   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
455 
456   /* loop over matrix elements drawing boxes */
457   color = PETSC_DRAW_BLUE;
458   for (i=0,row=0; i<mbs; i++,row+=bs) {
459     for (j=a->i[i]; j<a->i[i+1]; j++) {
460       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
461       x_l = a->j[j]*bs; x_r = x_l + 1.0;
462       aa = a->a + j*bs2;
463       for (k=0; k<bs; k++) {
464         for (l=0; l<bs; l++) {
465           if (PetscRealPart(*aa++) >=  0.) continue;
466           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
467         }
468       }
469     }
470   }
471   color = PETSC_DRAW_CYAN;
472   for (i=0,row=0; i<mbs; i++,row+=bs) {
473     for (j=a->i[i]; j<a->i[i+1]; j++) {
474       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
475       x_l = a->j[j]*bs; x_r = x_l + 1.0;
476       aa = a->a + j*bs2;
477       for (k=0; k<bs; k++) {
478         for (l=0; l<bs; l++) {
479           if (PetscRealPart(*aa++) != 0.) continue;
480           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
481         }
482       }
483     }
484   }
485 
486   color = PETSC_DRAW_RED;
487   for (i=0,row=0; i<mbs; i++,row+=bs) {
488     for (j=a->i[i]; j<a->i[i+1]; j++) {
489       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
490       x_l = a->j[j]*bs; x_r = x_l + 1.0;
491       aa = a->a + j*bs2;
492       for (k=0; k<bs; k++) {
493         for (l=0; l<bs; l++) {
494           if (PetscRealPart(*aa++) <= 0.) continue;
495           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
496         }
497       }
498     }
499   }
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNCT__
504 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
505 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
506 {
507   int          ierr;
508   PetscReal    xl,yl,xr,yr,w,h;
509   PetscDraw    draw;
510   PetscTruth   isnull;
511 
512   PetscFunctionBegin;
513 
514   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
515   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
516 
517   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
518   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
519   xr += w;    yr += h;  xl = -w;     yl = -h;
520   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
521   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
522   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatView_SeqSBAIJ"
528 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
529 {
530   int        ierr;
531   PetscTruth issocket,isascii,isbinary,isdraw;
532 
533   PetscFunctionBegin;
534   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
535   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
536   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
537   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
538   if (issocket) {
539     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
540   } else if (isascii){
541     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
542   } else if (isbinary) {
543     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
544   } else if (isdraw) {
545     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
546   } else {
547     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
548   }
549   PetscFunctionReturn(0);
550 }
551 
552 
553 #undef __FUNCT__
554 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
555 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
556 {
557   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
558   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
559   int        *ai = a->i,*ailen = a->ilen;
560   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
561   MatScalar  *ap,*aa = a->a,zero = 0.0;
562 
563   PetscFunctionBegin;
564   for (k=0; k<m; k++) { /* loop over rows */
565     row  = im[k]; brow = row/bs;
566     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
567     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
568     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
569     nrow = ailen[brow];
570     for (l=0; l<n; l++) { /* loop over columns */
571       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
572       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
573       col  = in[l] ;
574       bcol = col/bs;
575       cidx = col%bs;
576       ridx = row%bs;
577       high = nrow;
578       low  = 0; /* assume unsorted */
579       while (high-low > 5) {
580         t = (low+high)/2;
581         if (rp[t] > bcol) high = t;
582         else             low  = t;
583       }
584       for (i=low; i<high; i++) {
585         if (rp[i] > bcol) break;
586         if (rp[i] == bcol) {
587           *v++ = ap[bs2*i+bs*cidx+ridx];
588           goto finished;
589         }
590       }
591       *v++ = zero;
592       finished:;
593     }
594   }
595   PetscFunctionReturn(0);
596 }
597 
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
601 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
602 {
603   PetscFunctionBegin;
604   SETERRQ(1,"Function not yet written for SBAIJ format");
605   /* PetscFunctionReturn(0); */
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
610 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
611 {
612   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
613   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
614   int        m = A->m,*ip,N,*ailen = a->ilen;
615   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
616   MatScalar  *aa = a->a,*ap;
617 
618   PetscFunctionBegin;
619   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
620 
621   if (m) rmax = ailen[0];
622   for (i=1; i<mbs; i++) {
623     /* move each row back by the amount of empty slots (fshift) before it*/
624     fshift += imax[i-1] - ailen[i-1];
625     rmax   = PetscMax(rmax,ailen[i]);
626     if (fshift) {
627       ip = aj + ai[i]; ap = aa + bs2*ai[i];
628       N = ailen[i];
629       for (j=0; j<N; j++) {
630         ip[j-fshift] = ip[j];
631         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
632       }
633     }
634     ai[i] = ai[i-1] + ailen[i-1];
635   }
636   if (mbs) {
637     fshift += imax[mbs-1] - ailen[mbs-1];
638     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
639   }
640   /* reset ilen and imax for each row */
641   for (i=0; i<mbs; i++) {
642     ailen[i] = imax[i] = ai[i+1] - ai[i];
643   }
644   a->s_nz = ai[mbs];
645 
646   /* diagonals may have moved, so kill the diagonal pointers */
647   if (fshift && a->diag) {
648     ierr = PetscFree(a->diag);CHKERRQ(ierr);
649     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
650     a->diag = 0;
651   }
652   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
653            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
654   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
655            a->reallocs);
656   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
657   a->reallocs          = 0;
658   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
659 
660   PetscFunctionReturn(0);
661 }
662 
663 /*
664    This function returns an array of flags which indicate the locations of contiguous
665    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
666    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
667    Assume: sizes should be long enough to hold all the values.
668 */
669 #undef __FUNCT__
670 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
671 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
672 {
673   int        i,j,k,row;
674   PetscTruth flg;
675 
676   PetscFunctionBegin;
677   for (i=0,j=0; i<n; j++) {
678     row = idx[i];
679     if (row%bs!=0) { /* Not the begining of a block */
680       sizes[j] = 1;
681       i++;
682     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
683       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
684       i++;
685     } else { /* Begining of the block, so check if the complete block exists */
686       flg = PETSC_TRUE;
687       for (k=1; k<bs; k++) {
688         if (row+k != idx[i+k]) { /* break in the block */
689           flg = PETSC_FALSE;
690           break;
691         }
692       }
693       if (flg == PETSC_TRUE) { /* No break in the bs */
694         sizes[j] = bs;
695         i+= bs;
696       } else {
697         sizes[j] = 1;
698         i++;
699       }
700     }
701   }
702   *bs_max = j;
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNCT__
707 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
708 int MatZeroRows_SeqSBAIJ(Mat A,IS is,Scalar *diag)
709 {
710   Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data;
711   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
712   int         bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max;
713   Scalar      zero = 0.0;
714   MatScalar   *aa;
715 
716   PetscFunctionBegin;
717   /* Make a copy of the IS and  sort it */
718   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
719   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
720 
721   /* allocate memory for rows,sizes */
722   ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
723   sizes = rows + is_n;
724 
725   /* initialize copy IS values to rows, and sort them */
726   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
727   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
728   if (sbaij->keepzeroedrows) { /* do not change nonzero structure */
729     for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */
730     bs_max = is_n;            /* bs_max: num. of contiguous block row in the row */
731   } else {
732     ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
733   }
734   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
735 
736   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
737     row   = rows[j];                  /* row to be zeroed */
738     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
739     count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */
740     aa    = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs);
741     if (sizes[i] == bs && !sbaij->keepzeroedrows) {
742       if (diag) {
743         if (sbaij->ilen[row/bs] > 0) {
744           sbaij->ilen[row/bs] = 1;
745           sbaij->j[sbaij->i[row/bs]] = row/bs;
746           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
747         }
748         /* Now insert all the diagoanl values for this bs */
749         for (k=0; k<bs; k++) {
750           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
751         }
752       } else { /* (!diag) */
753         sbaij->ilen[row/bs] = 0;
754       } /* end (!diag) */
755     } else { /* (sizes[i] != bs), broken block */
756 #if defined (PETSC_USE_DEBUG)
757       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
758 #endif
759       for (k=0; k<count; k++) {
760         aa[0] = zero;
761         aa+=bs;
762       }
763       if (diag) {
764         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
765       }
766     }
767   }
768 
769   ierr = PetscFree(rows);CHKERRQ(ierr);
770   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
771   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 
775 /* Only add/insert a(i,j) with i<=j (blocks).
776    Any a(i,j) with i>j input by user is ingored.
777 */
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
781 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
782 {
783   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
784   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
785   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
786   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
787   int         ridx,cidx,bs2=a->bs2,ierr;
788   MatScalar   *ap,value,*aa=a->a,*bap;
789 
790   PetscFunctionBegin;
791 
792   for (k=0; k<m; k++) { /* loop over added rows */
793     row  = im[k];       /* row number */
794     brow = row/bs;      /* block row number */
795     if (row < 0) continue;
796 #if defined(PETSC_USE_BOPT_g)
797     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
798 #endif
799     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
800     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
801     rmax = imax[brow];  /* maximum space allocated for this row */
802     nrow = ailen[brow]; /* actual length of this row */
803     low  = 0;
804 
805     for (l=0; l<n; l++) { /* loop over added columns */
806       if (in[l] < 0) continue;
807 #if defined(PETSC_USE_BOPT_g)
808       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
809 #endif
810       col = in[l];
811       bcol = col/bs;              /* block col number */
812 
813       if (brow <= bcol){
814         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
815         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
816           /* element value a(k,l) */
817           if (roworiented) {
818             value = v[l + k*n];
819           } else {
820             value = v[k + l*m];
821           }
822 
823           /* move pointer bap to a(k,l) quickly and add/insert value */
824           if (!sorted) low = 0; high = nrow;
825           while (high-low > 7) {
826             t = (low+high)/2;
827             if (rp[t] > bcol) high = t;
828             else              low  = t;
829           }
830           for (i=low; i<high; i++) {
831             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
832             if (rp[i] > bcol) break;
833             if (rp[i] == bcol) {
834               bap  = ap +  bs2*i + bs*cidx + ridx;
835               if (is == ADD_VALUES) *bap += value;
836               else                  *bap  = value;
837               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
838               if (brow == bcol && ridx < cidx){
839                 bap  = ap +  bs2*i + bs*ridx + cidx;
840                 if (is == ADD_VALUES) *bap += value;
841                 else                  *bap  = value;
842               }
843               goto noinsert1;
844             }
845           }
846 
847       if (nonew == 1) goto noinsert1;
848       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
849       if (nrow >= rmax) {
850         /* there is no extra room in row, therefore enlarge */
851         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
852         MatScalar *new_a;
853 
854         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
855 
856         /* Malloc new storage space */
857         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
858         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
859         new_j = (int*)(new_a + bs2*new_nz);
860         new_i = new_j + new_nz;
861 
862         /* copy over old data into new slots */
863         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
864         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
865         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
866         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
867         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
868         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
869         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
870         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
871         /* free up old matrix storage */
872         ierr = PetscFree(a->a);CHKERRQ(ierr);
873         if (!a->singlemalloc) {
874           ierr = PetscFree(a->i);CHKERRQ(ierr);
875           ierr = PetscFree(a->j);CHKERRQ(ierr);
876         }
877         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
878         a->singlemalloc = PETSC_TRUE;
879 
880         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
881         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
882         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
883         a->s_maxnz += bs2*CHUNKSIZE;
884         a->reallocs++;
885         a->s_nz++;
886       }
887 
888       N = nrow++ - 1;
889       /* shift up all the later entries in this row */
890       for (ii=N; ii>=i; ii--) {
891         rp[ii+1] = rp[ii];
892         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
893       }
894       if (N>=i) {
895         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
896       }
897       rp[i]                      = bcol;
898       ap[bs2*i + bs*cidx + ridx] = value;
899       noinsert1:;
900       low = i;
901       /* } */
902         }
903       } /* end of if .. if..  */
904     }                     /* end of loop over added columns */
905     ailen[brow] = nrow;
906   }                       /* end of loop over added rows */
907 
908   PetscFunctionReturn(0);
909 }
910 
911 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
912 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
913 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
914 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
915 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
916 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
917 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
918 extern int MatScale_SeqSBAIJ(Scalar*,Mat);
919 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
920 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
921 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
922 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
923 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
924 extern int MatZeroEntries_SeqSBAIJ(Mat);
925 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
926 
927 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
928 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
929 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
930 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
931 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
932 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
933 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
934 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
935 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
936 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
937 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
938 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
939 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
940 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
941 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
942 
943 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
944 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
945 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
946 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
947 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
948 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
949 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
950 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
951 
952 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
953 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
954 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
955 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
956 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
957 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
958 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
959 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
960 
961 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
962 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
963 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
964 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
965 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
966 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
967 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
968 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
969 
970 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
971 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
972 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
973 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
974 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
975 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
976 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
977 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
978 
979 #ifdef HAVE_MatICCFactor
980 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
981 #undef __FUNCT__
982 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
983 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
984 {
985   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
986   Mat         outA;
987   int         ierr;
988   PetscTruth  row_identity,col_identity;
989 
990   PetscFunctionBegin;
991   /*
992   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
993   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
994   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
995   if (!row_identity || !col_identity) {
996     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
997   }
998   */
999 
1000   outA          = inA;
1001   inA->factor   = FACTOR_CHOLESKY;
1002 
1003   if (!a->diag) {
1004     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1005   }
1006   /*
1007       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1008       for ILU(0) factorization with natural ordering
1009   */
1010   switch (a->bs) {
1011   case 1:
1012     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1013     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1014     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1015   case 2:
1016     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1017     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1018     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1019     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1020     break;
1021   case 3:
1022     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1023     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1024     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1025     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1026     break;
1027   case 4:
1028     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1029     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1030     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1031     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1032     break;
1033   case 5:
1034     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1035     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1036     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1037     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1038     break;
1039   case 6:
1040     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1041     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1042     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1043     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1044     break;
1045   case 7:
1046     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1047     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1048     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1049     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1050     break;
1051   default:
1052     a->row        = row;
1053     a->icol       = col;
1054     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1055     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1056 
1057     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1058     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1059     PetscLogObjectParent(inA,a->icol);
1060 
1061     if (!a->solve_work) {
1062       ierr = PetscMalloc((A->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr);
1063       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(Scalar));
1064     }
1065   }
1066 
1067   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1068 
1069   PetscFunctionReturn(0);
1070 }
1071 #endif
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1075 int MatPrintHelp_SeqSBAIJ(Mat A)
1076 {
1077   static PetscTruth called = PETSC_FALSE;
1078   MPI_Comm          comm = A->comm;
1079   int               ierr;
1080 
1081   PetscFunctionBegin;
1082   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1083   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1084   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 EXTERN_C_BEGIN
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1091 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1092 {
1093   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1094   int         i,nz,n;
1095 
1096   PetscFunctionBegin;
1097   nz = baij->s_maxnz;
1098   n  = mat->n;
1099   for (i=0; i<nz; i++) {
1100     baij->j[i] = indices[i];
1101   }
1102   baij->s_nz = nz;
1103   for (i=0; i<n; i++) {
1104     baij->ilen[i] = baij->imax[i];
1105   }
1106 
1107   PetscFunctionReturn(0);
1108 }
1109 EXTERN_C_END
1110 
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1113 /*@
1114     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1115        in the matrix.
1116 
1117   Input Parameters:
1118 +  mat     - the SeqSBAIJ matrix
1119 -  indices - the column indices
1120 
1121   Level: advanced
1122 
1123   Notes:
1124     This can be called if you have precomputed the nonzero structure of the
1125   matrix and want to provide it to the matrix object to improve the performance
1126   of the MatSetValues() operation.
1127 
1128     You MUST have set the correct numbers of nonzeros per row in the call to
1129   MatCreateSeqSBAIJ().
1130 
1131     MUST be called before any calls to MatSetValues()
1132 
1133 .seealso: MatCreateSeqSBAIJ
1134 @*/
1135 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1136 {
1137   int ierr,(*f)(Mat,int *);
1138 
1139   PetscFunctionBegin;
1140   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1141   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr);
1142   if (f) {
1143     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1144   } else {
1145     SETERRQ(1,"Wrong type of matrix to set column indices");
1146   }
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1152 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1153 {
1154   int        ierr;
1155 
1156   PetscFunctionBegin;
1157   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 /* -------------------------------------------------------------------*/
1162 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1163        MatGetRow_SeqSBAIJ,
1164        MatRestoreRow_SeqSBAIJ,
1165        MatMult_SeqSBAIJ_N,
1166        MatMultAdd_SeqSBAIJ_N,
1167        MatMultTranspose_SeqSBAIJ,
1168        MatMultTransposeAdd_SeqSBAIJ,
1169        MatSolve_SeqSBAIJ_N,
1170        0,
1171        0,
1172        0,
1173        0,
1174        MatCholeskyFactor_SeqSBAIJ,
1175        0,
1176        MatTranspose_SeqSBAIJ,
1177        MatGetInfo_SeqSBAIJ,
1178        MatEqual_SeqSBAIJ,
1179        MatGetDiagonal_SeqSBAIJ,
1180        MatDiagonalScale_SeqSBAIJ,
1181        MatNorm_SeqSBAIJ,
1182        0,
1183        MatAssemblyEnd_SeqSBAIJ,
1184        0,
1185        MatSetOption_SeqSBAIJ,
1186        MatZeroEntries_SeqSBAIJ,
1187        MatZeroRows_SeqSBAIJ,
1188        0,
1189        0,
1190        MatCholeskyFactorSymbolic_SeqSBAIJ,
1191        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1192        MatSetUpPreallocation_SeqSBAIJ,
1193        0,
1194        MatGetOwnershipRange_SeqSBAIJ,
1195        0,
1196        MatICCFactorSymbolic_SeqSBAIJ,
1197        0,
1198        0,
1199        MatDuplicate_SeqSBAIJ,
1200        0,
1201        0,
1202        0,
1203        0,
1204        0,
1205        MatGetSubMatrices_SeqSBAIJ,
1206        MatIncreaseOverlap_SeqSBAIJ,
1207        MatGetValues_SeqSBAIJ,
1208        0,
1209        MatPrintHelp_SeqSBAIJ,
1210        MatScale_SeqSBAIJ,
1211        0,
1212        0,
1213        0,
1214        MatGetBlockSize_SeqSBAIJ,
1215        MatGetRowIJ_SeqSBAIJ,
1216        MatRestoreRowIJ_SeqSBAIJ,
1217        0,
1218        0,
1219        0,
1220        0,
1221        0,
1222        0,
1223        MatSetValuesBlocked_SeqSBAIJ,
1224        MatGetSubMatrix_SeqSBAIJ,
1225        0,
1226        0,
1227        MatGetMaps_Petsc,
1228        0,
1229        0,
1230        0,
1231        0,
1232        0,
1233        0,
1234        MatGetRowMax_SeqSBAIJ};
1235 
1236 EXTERN_C_BEGIN
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1239 int MatStoreValues_SeqSBAIJ(Mat mat)
1240 {
1241   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1242   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1243   int         ierr;
1244 
1245   PetscFunctionBegin;
1246   if (aij->nonew != 1) {
1247     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1248   }
1249 
1250   /* allocate space for values if not already there */
1251   if (!aij->saved_values) {
1252     ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr);
1253   }
1254 
1255   /* copy values over */
1256   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 EXTERN_C_END
1260 
1261 EXTERN_C_BEGIN
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1264 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1265 {
1266   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1267   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1268 
1269   PetscFunctionBegin;
1270   if (aij->nonew != 1) {
1271     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1272   }
1273   if (!aij->saved_values) {
1274     SETERRQ(1,"Must call MatStoreValues(A);first");
1275   }
1276 
1277   /* copy values over */
1278   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 EXTERN_C_END
1282 
1283 EXTERN_C_BEGIN
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1286 int MatCreate_SeqSBAIJ(Mat B)
1287 {
1288   Mat_SeqSBAIJ *b;
1289   int          ierr,size;
1290 
1291   PetscFunctionBegin;
1292   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1293   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1294   B->m = B->M = PetscMax(B->m,B->M);
1295   B->n = B->N = PetscMax(B->n,B->N);
1296 
1297   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1298   B->data = (void*)b;
1299   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1300   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1301   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1302   B->ops->view        = MatView_SeqSBAIJ;
1303   B->factor           = 0;
1304   B->lupivotthreshold = 1.0;
1305   B->mapping          = 0;
1306   b->row              = 0;
1307   b->icol             = 0;
1308   b->reallocs         = 0;
1309   b->saved_values     = 0;
1310 
1311   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1312   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1313 
1314   b->sorted           = PETSC_FALSE;
1315   b->roworiented      = PETSC_TRUE;
1316   b->nonew            = 0;
1317   b->diag             = 0;
1318   b->solve_work       = 0;
1319   b->mult_work        = 0;
1320   b->spptr            = 0;
1321   b->keepzeroedrows   = PETSC_FALSE;
1322 
1323   b->inew             = 0;
1324   b->jnew             = 0;
1325   b->anew             = 0;
1326   b->a2anew           = 0;
1327   b->permute          = PETSC_FALSE;
1328 
1329   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1330                                      "MatStoreValues_SeqSBAIJ",
1331                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1332   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1333                                      "MatRetrieveValues_SeqSBAIJ",
1334                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1335   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1336                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1337                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 EXTERN_C_END
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1344 /*@C
1345    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1346    compressed row) format.  For good matrix assembly performance the
1347    user should preallocate the matrix storage by setting the parameter nz
1348    (or the array nnz).  By setting these parameters accurately, performance
1349    during matrix assembly can be increased by more than a factor of 50.
1350 
1351    Collective on Mat
1352 
1353    Input Parameters:
1354 +  A - the symmetric matrix
1355 .  bs - size of block
1356 .  nz - number of block nonzeros per block row (same for all rows)
1357 -  nnz - array containing the number of block nonzeros in the various block rows
1358          (possibly different for each block row) or PETSC_NULL
1359 
1360    Options Database Keys:
1361 .   -mat_no_unroll - uses code that does not unroll the loops in the
1362                      block calculations (much slower)
1363 .    -mat_block_size - size of the blocks to use
1364 
1365    Level: intermediate
1366 
1367    Notes:
1368    The block AIJ format is compatible with standard Fortran 77
1369    storage.  That is, the stored row and column indices can begin at
1370    either one (as in Fortran) or zero.  See the users' manual for details.
1371 
1372    Specify the preallocated storage with either nz or nnz (not both).
1373    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1374    allocation.  For additional details, see the users manual chapter on
1375    matrices.
1376 
1377 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1378 @*/
1379 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1380 {
1381   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1382   int          i,len,ierr,mbs,bs2;
1383   PetscTruth   flg;
1384   int          s_nz;
1385 
1386   PetscFunctionBegin;
1387   B->preallocated = PETSC_TRUE;
1388   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1389   mbs  = B->m/bs;
1390   bs2  = bs*bs;
1391 
1392   if (mbs*bs != B->m) {
1393     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1394   }
1395 
1396   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1397   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1398   if (nnz) {
1399     for (i=0; i<mbs; i++) {
1400       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1401       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1402     }
1403   }
1404 
1405   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1406   if (!flg) {
1407     switch (bs) {
1408     case 1:
1409       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1410       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1411       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1412       B->ops->mult            = MatMult_SeqSBAIJ_1;
1413       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1414       break;
1415     case 2:
1416       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1417       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1418       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1419       B->ops->mult            = MatMult_SeqSBAIJ_2;
1420       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1421       break;
1422     case 3:
1423       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1424       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1425       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1426       B->ops->mult            = MatMult_SeqSBAIJ_3;
1427       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1428       break;
1429     case 4:
1430       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1431       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1432       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1433       B->ops->mult            = MatMult_SeqSBAIJ_4;
1434       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1435       break;
1436     case 5:
1437       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1438       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1439       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1440       B->ops->mult            = MatMult_SeqSBAIJ_5;
1441       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1442       break;
1443     case 6:
1444       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1445       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1446       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1447       B->ops->mult            = MatMult_SeqSBAIJ_6;
1448       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1449       break;
1450     case 7:
1451       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1452       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1453       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1454       B->ops->mult            = MatMult_SeqSBAIJ_7;
1455       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1456       break;
1457     }
1458   }
1459 
1460   b->mbs = mbs;
1461   b->nbs = mbs;
1462   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1463   if (!nnz) {
1464     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1465     else if (nz <= 0)        nz = 1;
1466     for (i=0; i<mbs; i++) {
1467       b->imax[i] = nz;
1468     }
1469     nz = nz*mbs; /* total nz */
1470   } else {
1471     nz = 0;
1472     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1473   }
1474   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1475   s_nz = nz;
1476 
1477   /* allocate the matrix space */
1478   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1479   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1480   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1481   b->j = (int*)(b->a + s_nz*bs2);
1482   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1483   b->i = b->j + s_nz;
1484   b->singlemalloc = PETSC_TRUE;
1485 
1486   /* pointer to beginning of each row */
1487   b->i[0] = 0;
1488   for (i=1; i<mbs+1; i++) {
1489     b->i[i] = b->i[i-1] + b->imax[i-1];
1490   }
1491 
1492   /* b->ilen will count nonzeros in each block row so far. */
1493   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1494   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1495   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1496 
1497   b->bs               = bs;
1498   b->bs2              = bs2;
1499   b->s_nz             = 0;
1500   b->s_maxnz          = s_nz*bs2;
1501 
1502   b->inew             = 0;
1503   b->jnew             = 0;
1504   b->anew             = 0;
1505   b->a2anew           = 0;
1506   b->permute          = PETSC_FALSE;
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 
1511 #undef __FUNCT__
1512 #define __FUNCT__ "MatCreateSeqSBAIJ"
1513 /*@C
1514    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1515    compressed row) format.  For good matrix assembly performance the
1516    user should preallocate the matrix storage by setting the parameter nz
1517    (or the array nnz).  By setting these parameters accurately, performance
1518    during matrix assembly can be increased by more than a factor of 50.
1519 
1520    Collective on MPI_Comm
1521 
1522    Input Parameters:
1523 +  comm - MPI communicator, set to PETSC_COMM_SELF
1524 .  bs - size of block
1525 .  m - number of rows, or number of columns
1526 .  nz - number of block nonzeros per block row (same for all rows)
1527 -  nnz - array containing the number of block nonzeros in the various block rows
1528          (possibly different for each block row) or PETSC_NULL
1529 
1530    Output Parameter:
1531 .  A - the symmetric matrix
1532 
1533    Options Database Keys:
1534 .   -mat_no_unroll - uses code that does not unroll the loops in the
1535                      block calculations (much slower)
1536 .    -mat_block_size - size of the blocks to use
1537 
1538    Level: intermediate
1539 
1540    Notes:
1541    The block AIJ format is compatible with standard Fortran 77
1542    storage.  That is, the stored row and column indices can begin at
1543    either one (as in Fortran) or zero.  See the users' manual for details.
1544 
1545    Specify the preallocated storage with either nz or nnz (not both).
1546    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1547    allocation.  For additional details, see the users manual chapter on
1548    matrices.
1549 
1550 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1551 @*/
1552 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1553 {
1554   int ierr;
1555 
1556   PetscFunctionBegin;
1557   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1558   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1559   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1565 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1566 {
1567   Mat         C;
1568   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1569   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1570 
1571   PetscFunctionBegin;
1572   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1573 
1574   *B = 0;
1575   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1576   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1577   c    = (Mat_SeqSBAIJ*)C->data;
1578 
1579   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1580   C->preallocated   = PETSC_TRUE;
1581   C->factor         = A->factor;
1582   c->row            = 0;
1583   c->icol           = 0;
1584   c->saved_values   = 0;
1585   c->keepzeroedrows = a->keepzeroedrows;
1586   C->assembled      = PETSC_TRUE;
1587 
1588   c->bs         = a->bs;
1589   c->bs2        = a->bs2;
1590   c->mbs        = a->mbs;
1591   c->nbs        = a->nbs;
1592 
1593   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1594   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1595   for (i=0; i<mbs; i++) {
1596     c->imax[i] = a->imax[i];
1597     c->ilen[i] = a->ilen[i];
1598   }
1599 
1600   /* allocate the matrix space */
1601   c->singlemalloc = PETSC_TRUE;
1602   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1603   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1604   c->j = (int*)(c->a + nz*bs2);
1605   c->i = c->j + nz;
1606   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1607   if (mbs > 0) {
1608     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1609     if (cpvalues == MAT_COPY_VALUES) {
1610       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1611     } else {
1612       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1613     }
1614   }
1615 
1616   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1617   c->sorted      = a->sorted;
1618   c->roworiented = a->roworiented;
1619   c->nonew       = a->nonew;
1620 
1621   if (a->diag) {
1622     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1623     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1624     for (i=0; i<mbs; i++) {
1625       c->diag[i] = a->diag[i];
1626     }
1627   } else c->diag        = 0;
1628   c->s_nz               = a->s_nz;
1629   c->s_maxnz            = a->s_maxnz;
1630   c->solve_work         = 0;
1631   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1632   c->mult_work          = 0;
1633   *B = C;
1634   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 EXTERN_C_BEGIN
1639 #undef __FUNCT__
1640 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1641 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1642 {
1643   Mat_SeqSBAIJ *a;
1644   Mat          B;
1645   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1646   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,*s_browlengths,maskcount;
1647   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1648   int          *masked,nmask,tmp,bs2,ishift;
1649   Scalar       *aa;
1650   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1651 
1652   PetscFunctionBegin;
1653   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1654   bs2  = bs*bs;
1655 
1656   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1657   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1658   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1659   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1660   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1661   M = header[1]; N = header[2]; nz = header[3];
1662 
1663   if (header[3] < 0) {
1664     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1665   }
1666 
1667   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1668 
1669   /*
1670      This code adds extra rows to make sure the number of rows is
1671     divisible by the blocksize
1672   */
1673   mbs        = M/bs;
1674   extra_rows = bs - M + bs*(mbs);
1675   if (extra_rows == bs) extra_rows = 0;
1676   else                  mbs++;
1677   if (extra_rows) {
1678     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1679   }
1680 
1681   /* read in row lengths */
1682   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1683   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1684   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1685 
1686   /* read in column indices */
1687   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1688   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1689   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1690 
1691   /* loop over row lengths determining block row lengths */
1692   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1693   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1694   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1695   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1696   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1697   masked   = mask + mbs;
1698   rowcount = 0; nzcount = 0;
1699   for (i=0; i<mbs; i++) {
1700     nmask = 0;
1701     for (j=0; j<bs; j++) {
1702       kmax = rowlengths[rowcount];
1703       for (k=0; k<kmax; k++) {
1704         tmp = jj[nzcount++]/bs;   /* block col. index */
1705         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1706       }
1707       rowcount++;
1708     }
1709     s_browlengths[i] += nmask;
1710     browlengths[i]    = 2*s_browlengths[i];
1711 
1712     /* zero out the mask elements we set */
1713     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1714   }
1715 
1716   /* create our matrix */
1717   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1718   B = *A;
1719   a = (Mat_SeqSBAIJ*)B->data;
1720 
1721   /* set matrix "i" values */
1722   a->i[0] = 0;
1723   for (i=1; i<= mbs; i++) {
1724     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1725     a->ilen[i-1] = s_browlengths[i-1];
1726   }
1727   a->s_nz         = 0;
1728   for (i=0; i<mbs; i++) a->s_nz += s_browlengths[i];
1729 
1730   /* read in nonzero values */
1731   ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr);
1732   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1733   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1734 
1735   /* set "a" and "j" values into matrix */
1736   nzcount = 0; jcount = 0;
1737   for (i=0; i<mbs; i++) {
1738     nzcountb = nzcount;
1739     nmask    = 0;
1740     for (j=0; j<bs; j++) {
1741       kmax = rowlengths[i*bs+j];
1742       for (k=0; k<kmax; k++) {
1743         tmp = jj[nzcount++]/bs; /* block col. index */
1744         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1745       }
1746     }
1747     /* sort the masked values */
1748     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1749 
1750     /* set "j" values into matrix */
1751     maskcount = 1;
1752     for (j=0; j<nmask; j++) {
1753       a->j[jcount++]  = masked[j];
1754       mask[masked[j]] = maskcount++;
1755     }
1756 
1757     /* set "a" values into matrix */
1758     ishift = bs2*a->i[i];
1759     for (j=0; j<bs; j++) {
1760       kmax = rowlengths[i*bs+j];
1761       for (k=0; k<kmax; k++) {
1762         tmp       = jj[nzcountb]/bs ; /* block col. index */
1763         if (tmp >= i){
1764           block     = mask[tmp] - 1;
1765           point     = jj[nzcountb] - bs*tmp;
1766           idx       = ishift + bs2*block + j + bs*point;
1767           a->a[idx] = aa[nzcountb];
1768         }
1769         nzcountb++;
1770       }
1771     }
1772     /* zero out the mask elements we set */
1773     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1774   }
1775   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1776 
1777   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1778   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1779   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1780   ierr = PetscFree(aa);CHKERRQ(ierr);
1781   ierr = PetscFree(jj);CHKERRQ(ierr);
1782   ierr = PetscFree(mask);CHKERRQ(ierr);
1783 
1784   B->assembled = PETSC_TRUE;
1785   ierr = MatView_Private(B);CHKERRQ(ierr);
1786   PetscFunctionReturn(0);
1787 }
1788 EXTERN_C_END
1789 
1790 
1791 
1792