xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 8549e40251be5a97b98773afce3ad548353ba44c)
1 /*$Id: sbaij.c,v 1.36 2000/10/12 18:19:32 hzhang Exp hzhang $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                            /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 #include "src/mat/impls/sbaij/seq/sbaij.h"
12 
13 #define CHUNKSIZE  10
14 
15 /*
16      Checks for missing diagonals
17 */
18 #undef __FUNC__
19 #define __FUNC__ "MatMissingDiagonal_SeqSBAIJ"
20 int MatMissingDiagonal_SeqSBAIJ(Mat A)
21 {
22   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
23   int         *diag,*jj = a->j,i,ierr;
24 
25   PetscFunctionBegin;
26   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
27   diag = a->diag;
28   for (i=0; i<a->mbs; i++) {
29     if (jj[diag[i]] != i) {
30       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
31     }
32   }
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNC__
37 #define __FUNC__ "MatMarkDiagonal_SeqSBAIJ"
38 int MatMarkDiagonal_SeqSBAIJ(Mat A)
39 {
40   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
41   int          i,j,*diag,m = a->mbs;
42 
43   PetscFunctionBegin;
44   if (a->diag) PetscFunctionReturn(0);
45 
46   diag = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(diag);
47   PLogObjectMemory(A,(m+1)*sizeof(int));
48   for (i=0; i<m; i++) {
49     diag[i] = a->i[i+1];
50     for (j=a->i[i]; j<a->i[i+1]; j++) {
51       if (a->j[j] == i) {
52         diag[i] = j;
53         break;
54       }
55     }
56   }
57   a->diag = diag;
58   PetscFunctionReturn(0);
59 }
60 
61 
62 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
63 
64 #undef __FUNC__
65 #define __FUNC__ "MatGetRowIJ_SeqSBAIJ"
66 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
67 {
68   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
69 
70   PetscFunctionBegin;
71   if (ia) {
72     SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
73   }
74   *nn = a->mbs;
75   PetscFunctionReturn(0);
76 }
77 
78 #undef __FUNC__
79 #define __FUNC__ "MatRestoreRowIJ_SeqSBAIJ"
80 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
81 {
82   PetscFunctionBegin;
83   if (!ia) PetscFunctionReturn(0);
84   SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
85   /* PetscFunctionReturn(0); */
86 }
87 
88 #undef __FUNC__
89 #define __FUNC__ "MatGetBlockSize_SeqSBAIJ"
90 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
91 {
92   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
93 
94   PetscFunctionBegin;
95   *bs = sbaij->bs;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNC__
100 #define __FUNC__ "MatDestroy_SeqSBAIJ"
101 int MatDestroy_SeqSBAIJ(Mat A)
102 {
103   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
104   int         ierr;
105 
106   PetscFunctionBegin;
107   if (A->mapping) {
108     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
109   }
110   if (A->bmapping) {
111     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
112   }
113   if (A->rmap) {
114     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
115   }
116   if (A->cmap) {
117     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
118   }
119 #if defined(PETSC_USE_LOG)
120   PLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
121 #endif
122   ierr = PetscFree(a->a);CHKERRQ(ierr);
123   if (!a->singlemalloc) {
124     ierr = PetscFree(a->i);CHKERRQ(ierr);
125     ierr = PetscFree(a->j);CHKERRQ(ierr);
126   }
127   if (a->row) {
128     ierr = ISDestroy(a->row);CHKERRQ(ierr);
129   }
130   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
131   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
132   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
133   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
134   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
135   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
136   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
137 
138   if (a->inew){
139     ierr = PetscFree(a->inew);CHKERRQ(ierr);
140     a->inew = 0;
141   }
142   ierr = PetscFree(a);CHKERRQ(ierr);
143   PLogObjectDestroy(A);
144   PetscHeaderDestroy(A);
145   PetscFunctionReturn(0);
146 }
147 
148 #undef __FUNC__
149 #define __FUNC__ "MatSetOption_SeqSBAIJ"
150 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
151 {
152   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
153 
154   PetscFunctionBegin;
155   if      (op == MAT_ROW_ORIENTED)                 a->roworiented    = PETSC_TRUE;
156   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented    = PETSC_FALSE;
157   else if (op == MAT_COLUMNS_SORTED)               a->sorted         = PETSC_TRUE;
158   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted         = PETSC_FALSE;
159   else if (op == MAT_KEEP_ZEROED_ROWS)             a->keepzeroedrows = PETSC_TRUE;
160   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew          = 1;
161   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew          = -1;
162   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew          = -2;
163   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew          = 0;
164   else if (op == MAT_ROWS_SORTED ||
165            op == MAT_ROWS_UNSORTED ||
166            op == MAT_SYMMETRIC ||
167            op == MAT_STRUCTURALLY_SYMMETRIC ||
168            op == MAT_YES_NEW_DIAGONALS ||
169            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
170            op == MAT_USE_HASH_TABLE) {
171     PLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
172   } else if (op == MAT_NO_NEW_DIAGONALS) {
173     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
174   } else {
175     SETERRQ(PETSC_ERR_SUP,"unknown option");
176   }
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNC__
181 #define __FUNC__ "MatGetOwnershipRange_SeqSBAIJ"
182 int MatGetOwnershipRange_SeqSBAIJ(Mat A,int *m,int *n)
183 {
184   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
185 
186   PetscFunctionBegin;
187   *m = 0;
188   *n = A->m;
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNC__
193 #define __FUNC__ "MatGetRow_SeqSBAIJ"
194 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,Scalar **v)
195 {
196   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
197   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
198   MatScalar    *aa,*aa_i;
199   Scalar       *v_i;
200 
201   PetscFunctionBegin;
202   bs  = a->bs;
203   ai  = a->i;
204   aj  = a->j;
205   aa  = a->a;
206   bs2 = a->bs2;
207 
208   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
209 
210   bn  = row/bs;   /* Block number */
211   bp  = row % bs; /* Block position */
212   M   = ai[bn+1] - ai[bn];
213   *ncols = bs*M;
214 
215   if (v) {
216     *v = 0;
217     if (*ncols) {
218       *v = (Scalar*)PetscMalloc((*ncols+row)*sizeof(Scalar));CHKPTRQ(*v);
219       for (i=0; i<M; i++) { /* for each block in the block row */
220         v_i  = *v + i*bs;
221         aa_i = aa + bs2*(ai[bn] + i);
222         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
223       }
224     }
225   }
226 
227   if (cols) {
228     *cols = 0;
229     if (*ncols) {
230       *cols = (int*)PetscMalloc((*ncols+row)*sizeof(int));CHKPTRQ(*cols);
231       for (i=0; i<M; i++) { /* for each block in the block row */
232         cols_i = *cols + i*bs;
233         itmp  = bs*aj[ai[bn] + i];
234         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
235       }
236     }
237   }
238 
239   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
240   /* this segment is currently removed, so only entries in the upper triangle are obtained */
241 #ifdef column_search
242   v_i    = *v    + M*bs;
243   cols_i = *cols + M*bs;
244   for (i=0; i<bn; i++){ /* for each block row */
245     M = ai[i+1] - ai[i];
246     for (j=0; j<M; j++){
247       itmp = aj[ai[i] + j];    /* block column value */
248       if (itmp == bn){
249         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
250         for (k=0; k<bs; k++) {
251           *cols_i++ = i*bs+k;
252           *v_i++    = aa_i[k];
253         }
254         *ncols += bs;
255         break;
256       }
257     }
258   }
259 #endif
260 
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNC__
265 #define __FUNC__ "MatRestoreRow_SeqSBAIJ"
266 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
267 {
268   int ierr;
269 
270   PetscFunctionBegin;
271   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
272   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNC__
277 #define __FUNC__ "MatTranspose_SeqSBAIJ"
278 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
279 {
280   PetscFunctionBegin;
281   SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
282   /* PetscFunctionReturn(0); */
283 }
284 
285 #undef __FUNC__
286 #define __FUNC__ "MatView_SeqSBAIJ_Binary"
287 static int MatView_SeqSBAIJ_Binary(Mat A,Viewer viewer)
288 {
289   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
290   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
291   Scalar      *aa;
292   FILE        *file;
293 
294   PetscFunctionBegin;
295   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
296   col_lens = (int*)PetscMalloc((4+A->m)*sizeof(int));CHKPTRQ(col_lens);
297   col_lens[0] = MAT_COOKIE;
298 
299   col_lens[1] = A->m;
300   col_lens[2] = A->m;
301   col_lens[3] = a->s_nz*bs2;
302 
303   /* store lengths of each row and write (including header) to file */
304   count = 0;
305   for (i=0; i<a->mbs; i++) {
306     for (j=0; j<bs; j++) {
307       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
308     }
309   }
310   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
311   ierr = PetscFree(col_lens);CHKERRQ(ierr);
312 
313   /* store column indices (zero start index) */
314   jj = (int*)PetscMalloc((a->s_nz+1)*bs2*sizeof(int));CHKPTRQ(jj);
315   count = 0;
316   for (i=0; i<a->mbs; i++) {
317     for (j=0; j<bs; j++) {
318       for (k=a->i[i]; k<a->i[i+1]; k++) {
319         for (l=0; l<bs; l++) {
320           jj[count++] = bs*a->j[k] + l;
321         }
322       }
323     }
324   }
325   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
326   ierr = PetscFree(jj);CHKERRQ(ierr);
327 
328   /* store nonzero values */
329   aa = (Scalar*)PetscMalloc((a->s_nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
330   count = 0;
331   for (i=0; i<a->mbs; i++) {
332     for (j=0; j<bs; j++) {
333       for (k=a->i[i]; k<a->i[i+1]; k++) {
334         for (l=0; l<bs; l++) {
335           aa[count++] = a->a[bs2*k + l*bs + j];
336         }
337       }
338     }
339   }
340   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
341   ierr = PetscFree(aa);CHKERRQ(ierr);
342 
343   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
344   if (file) {
345     fprintf(file,"-matload_block_size %d\n",a->bs);
346   }
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNC__
351 #define __FUNC__ "MatView_SeqSBAIJ_ASCII"
352 static int MatView_SeqSBAIJ_ASCII(Mat A,Viewer viewer)
353 {
354   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
355   int         ierr,i,j,format,bs = a->bs,k,l,bs2=a->bs2;
356   char        *outputname;
357 
358   PetscFunctionBegin;
359   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
360   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
361   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
362     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
363   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
364     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
365   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
366     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
367     for (i=0; i<a->mbs; i++) {
368       for (j=0; j<bs; j++) {
369         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
370         for (k=a->i[i]; k<a->i[i+1]; k++) {
371           for (l=0; l<bs; l++) {
372 #if defined(PETSC_USE_COMPLEX)
373             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
374               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
375                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
376             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
377               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
378                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
379             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
380               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
381             }
382 #else
383             if (a->a[bs2*k + l*bs + j] != 0.0) {
384               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
385             }
386 #endif
387           }
388         }
389         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
390       }
391     }
392     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
393   } else {
394     ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
395     for (i=0; i<a->mbs; i++) {
396       for (j=0; j<bs; j++) {
397         ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
398         for (k=a->i[i]; k<a->i[i+1]; k++) {
399           for (l=0; l<bs; l++) {
400 #if defined(PETSC_USE_COMPLEX)
401             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
402               ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
403                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
404             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
405               ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
406                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
407             } else {
408               ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
409             }
410 #else
411             ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
412 #endif
413           }
414         }
415         ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
416       }
417     }
418     ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
419   }
420   ierr = ViewerFlush(viewer);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNC__
425 #define __FUNC__ "MatView_SeqSBAIJ_Draw_Zoom"
426 static int MatView_SeqSBAIJ_Draw_Zoom(Draw draw,void *Aa)
427 {
428   Mat          A = (Mat) Aa;
429   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)A->data;
430   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
431   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
432   MatScalar    *aa;
433   MPI_Comm     comm;
434   Viewer       viewer;
435 
436   PetscFunctionBegin;
437   /*
438       This is nasty. If this is called from an originally parallel matrix
439    then all processes call this,but only the first has the matrix so the
440    rest should return immediately.
441   */
442   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
443   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
444   if (rank) PetscFunctionReturn(0);
445 
446   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
447 
448   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
449   DrawString(draw, .3*(xl+xr), .3*(yl+yr), DRAW_BLACK, "symmetric");
450 
451   /* loop over matrix elements drawing boxes */
452   color = DRAW_BLUE;
453   for (i=0,row=0; i<mbs; i++,row+=bs) {
454     for (j=a->i[i]; j<a->i[i+1]; j++) {
455       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
456       x_l = a->j[j]*bs; x_r = x_l + 1.0;
457       aa = a->a + j*bs2;
458       for (k=0; k<bs; k++) {
459         for (l=0; l<bs; l++) {
460           if (PetscRealPart(*aa++) >=  0.) continue;
461           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
462         }
463       }
464     }
465   }
466   color = DRAW_CYAN;
467   for (i=0,row=0; i<mbs; i++,row+=bs) {
468     for (j=a->i[i]; j<a->i[i+1]; j++) {
469       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
470       x_l = a->j[j]*bs; x_r = x_l + 1.0;
471       aa = a->a + j*bs2;
472       for (k=0; k<bs; k++) {
473         for (l=0; l<bs; l++) {
474           if (PetscRealPart(*aa++) != 0.) continue;
475           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
476         }
477       }
478     }
479   }
480 
481   color = DRAW_RED;
482   for (i=0,row=0; i<mbs; i++,row+=bs) {
483     for (j=a->i[i]; j<a->i[i+1]; j++) {
484       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
485       x_l = a->j[j]*bs; x_r = x_l + 1.0;
486       aa = a->a + j*bs2;
487       for (k=0; k<bs; k++) {
488         for (l=0; l<bs; l++) {
489           if (PetscRealPart(*aa++) <= 0.) continue;
490           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
491         }
492       }
493     }
494   }
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNC__
499 #define __FUNC__ "MatView_SeqSBAIJ_Draw"
500 static int MatView_SeqSBAIJ_Draw(Mat A,Viewer viewer)
501 {
502   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)A->data;
503   int          ierr;
504   PetscReal    xl,yl,xr,yr,w,h;
505   Draw         draw;
506   PetscTruth   isnull;
507 
508   PetscFunctionBegin;
509 
510   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
511   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
512 
513   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
514   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
515   xr += w;    yr += h;  xl = -w;     yl = -h;
516   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
517   ierr = DrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
518   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 #undef __FUNC__
523 #define __FUNC__ "MatView_SeqSBAIJ"
524 int MatView_SeqSBAIJ(Mat A,Viewer viewer)
525 {
526   int        ierr;
527   PetscTruth issocket,isascii,isbinary,isdraw;
528 
529   PetscFunctionBegin;
530   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
531   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
532   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
533   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
534   if (issocket) {
535     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
536   } else if (isascii){
537     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
538   } else if (isbinary) {
539     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
540   } else if (isdraw) {
541     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
542   } else {
543     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
544   }
545   PetscFunctionReturn(0);
546 }
547 
548 
549 #undef __FUNC__
550 #define __FUNC__ "MatGetValues_SeqSBAIJ"
551 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
552 {
553   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
554   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
555   int        *ai = a->i,*ailen = a->ilen;
556   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
557   MatScalar  *ap,*aa = a->a,zero = 0.0;
558 
559   PetscFunctionBegin;
560   for (k=0; k<m; k++) { /* loop over rows */
561     row  = im[k]; brow = row/bs;
562     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
563     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
564     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
565     nrow = ailen[brow];
566     for (l=0; l<n; l++) { /* loop over columns */
567       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
568       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
569       col  = in[l] ;
570       bcol = col/bs;
571       cidx = col%bs;
572       ridx = row%bs;
573       high = nrow;
574       low  = 0; /* assume unsorted */
575       while (high-low > 5) {
576         t = (low+high)/2;
577         if (rp[t] > bcol) high = t;
578         else             low  = t;
579       }
580       for (i=low; i<high; i++) {
581         if (rp[i] > bcol) break;
582         if (rp[i] == bcol) {
583           *v++ = ap[bs2*i+bs*cidx+ridx];
584           goto finished;
585         }
586       }
587       *v++ = zero;
588       finished:;
589     }
590   }
591   PetscFunctionReturn(0);
592 }
593 
594 
595 #undef __FUNC__
596 #define __FUNC__ "MatSetValuesBlocked_SeqSBAIJ"
597 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
598 {
599   PetscFunctionBegin;
600   SETERRQ(1,"Function not yet written for SBAIJ format");
601   /* PetscFunctionReturn(0); */
602 }
603 
604 #undef __FUNC__
605 #define __FUNC__ "MatAssemblyEnd_SeqSBAIJ"
606 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
607 {
608   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
609   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
610   int        m = A->m,*ip,N,*ailen = a->ilen;
611   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
612   MatScalar  *aa = a->a,*ap;
613 
614   PetscFunctionBegin;
615   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
616 
617   if (m) rmax = ailen[0];
618   for (i=1; i<mbs; i++) {
619     /* move each row back by the amount of empty slots (fshift) before it*/
620     fshift += imax[i-1] - ailen[i-1];
621     rmax   = PetscMax(rmax,ailen[i]);
622     if (fshift) {
623       ip = aj + ai[i]; ap = aa + bs2*ai[i];
624       N = ailen[i];
625       for (j=0; j<N; j++) {
626         ip[j-fshift] = ip[j];
627         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
628       }
629     }
630     ai[i] = ai[i-1] + ailen[i-1];
631   }
632   if (mbs) {
633     fshift += imax[mbs-1] - ailen[mbs-1];
634     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
635   }
636   /* reset ilen and imax for each row */
637   for (i=0; i<mbs; i++) {
638     ailen[i] = imax[i] = ai[i+1] - ai[i];
639   }
640   a->s_nz = ai[mbs];
641 
642   /* diagonals may have moved, so kill the diagonal pointers */
643   if (fshift && a->diag) {
644     ierr = PetscFree(a->diag);CHKERRQ(ierr);
645     PLogObjectMemory(A,-(m+1)*sizeof(int));
646     a->diag = 0;
647   }
648   PLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
649            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
650   PLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
651            a->reallocs);
652   PLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
653   a->reallocs          = 0;
654   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
655 
656   PetscFunctionReturn(0);
657 }
658 
659 /*
660    This function returns an array of flags which indicate the locations of contiguous
661    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
662    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
663    Assume: sizes should be long enough to hold all the values.
664 */
665 #undef __FUNC__
666 #define __FUNC__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
667 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
668 {
669   int        i,j,k,row;
670   PetscTruth flg;
671 
672   PetscFunctionBegin;
673   for (i=0,j=0; i<n; j++) {
674     row = idx[i];
675     if (row%bs!=0) { /* Not the begining of a block */
676       sizes[j] = 1;
677       i++;
678     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
679       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
680       i++;
681     } else { /* Begining of the block, so check if the complete block exists */
682       flg = PETSC_TRUE;
683       for (k=1; k<bs; k++) {
684         if (row+k != idx[i+k]) { /* break in the block */
685           flg = PETSC_FALSE;
686           break;
687         }
688       }
689       if (flg == PETSC_TRUE) { /* No break in the bs */
690         sizes[j] = bs;
691         i+= bs;
692       } else {
693         sizes[j] = 1;
694         i++;
695       }
696     }
697   }
698   *bs_max = j;
699   PetscFunctionReturn(0);
700 }
701 
702 #undef __FUNC__
703 #define __FUNC__ "MatZeroRows_SeqSBAIJ"
704 int MatZeroRows_SeqSBAIJ(Mat A,IS is,Scalar *diag)
705 {
706   Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data;
707   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
708   int         bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max;
709   Scalar      zero = 0.0;
710   MatScalar   *aa;
711 
712   PetscFunctionBegin;
713   /* Make a copy of the IS and  sort it */
714   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
715   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
716 
717   /* allocate memory for rows,sizes */
718   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
719   sizes = rows + is_n;
720 
721   /* initialize copy IS values to rows, and sort them */
722   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
723   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
724   if (sbaij->keepzeroedrows) { /* do not change nonzero structure */
725     for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */
726     bs_max = is_n;            /* bs_max: num. of contiguous block row in the row */
727   } else {
728     ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
729   }
730   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
731 
732   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
733     row   = rows[j];                  /* row to be zeroed */
734     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
735     count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */
736     aa    = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs);
737     if (sizes[i] == bs && !sbaij->keepzeroedrows) {
738       if (diag) {
739         if (sbaij->ilen[row/bs] > 0) {
740           sbaij->ilen[row/bs] = 1;
741           sbaij->j[sbaij->i[row/bs]] = row/bs;
742           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
743         }
744         /* Now insert all the diagoanl values for this bs */
745         for (k=0; k<bs; k++) {
746           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
747         }
748       } else { /* (!diag) */
749         sbaij->ilen[row/bs] = 0;
750       } /* end (!diag) */
751     } else { /* (sizes[i] != bs), broken block */
752 #if defined (PETSC_USE_DEBUG)
753       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
754 #endif
755       for (k=0; k<count; k++) {
756         aa[0] = zero;
757         aa+=bs;
758       }
759       if (diag) {
760         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
761       }
762     }
763   }
764 
765   ierr = PetscFree(rows);CHKERRQ(ierr);
766   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
767   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
768   PetscFunctionReturn(0);
769 }
770 
771 /* Only add/insert a(i,j) with i<=j (blocks).
772    Any a(i,j) with i>j input by user is ingored.
773 */
774 
775 #undef __FUNC__
776 #define __FUNC__ "MatSetValues_SeqSBAIJ"
777 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
778 {
779   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
780   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
781   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
782   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
783   int         ridx,cidx,bs2=a->bs2,ierr;
784   MatScalar   *ap,value,*aa=a->a,*bap;
785 
786   PetscFunctionBegin;
787 
788   for (k=0; k<m; k++) { /* loop over added rows */
789     row  = im[k];       /* row number */
790     brow = row/bs;      /* block row number */
791     if (row < 0) continue;
792 #if defined(PETSC_USE_BOPT_g)
793     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
794 #endif
795     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
796     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
797     rmax = imax[brow];  /* maximum space allocated for this row */
798     nrow = ailen[brow]; /* actual length of this row */
799     low  = 0;
800 
801     for (l=0; l<n; l++) { /* loop over added columns */
802       if (in[l] < 0) continue;
803 #if defined(PETSC_USE_BOPT_g)
804       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
805 #endif
806       col = in[l];
807       bcol = col/bs;              /* block col number */
808 
809       if (brow <= bcol){
810         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
811         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
812           /* element value a(k,l) */
813           if (roworiented) {
814             value = v[l + k*n];
815           } else {
816             value = v[k + l*m];
817           }
818 
819           /* move pointer bap to a(k,l) quickly and add/insert value */
820           if (!sorted) low = 0; high = nrow;
821           while (high-low > 7) {
822             t = (low+high)/2;
823             if (rp[t] > bcol) high = t;
824             else              low  = t;
825           }
826           for (i=low; i<high; i++) {
827             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
828             if (rp[i] > bcol) break;
829             if (rp[i] == bcol) {
830               bap  = ap +  bs2*i + bs*cidx + ridx;
831               if (is == ADD_VALUES) *bap += value;
832               else                  *bap  = value;
833               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
834               if (brow == bcol && ridx < cidx){
835                 bap  = ap +  bs2*i + bs*ridx + cidx;
836                 if (is == ADD_VALUES) *bap += value;
837                 else                  *bap  = value;
838               }
839               goto noinsert1;
840             }
841           }
842 
843       if (nonew == 1) goto noinsert1;
844       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
845       if (nrow >= rmax) {
846         /* there is no extra room in row, therefore enlarge */
847         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
848         MatScalar *new_a;
849 
850         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
851 
852         /* Malloc new storage space */
853         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
854         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a);
855         new_j   = (int*)(new_a + bs2*new_nz);
856         new_i   = new_j + new_nz;
857 
858         /* copy over old data into new slots */
859         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
860         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
861         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
862         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
863         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
864         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
865         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
866         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
867         /* free up old matrix storage */
868         ierr = PetscFree(a->a);CHKERRQ(ierr);
869         if (!a->singlemalloc) {
870           ierr = PetscFree(a->i);CHKERRQ(ierr);
871           ierr = PetscFree(a->j);CHKERRQ(ierr);
872         }
873         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
874         a->singlemalloc = PETSC_TRUE;
875 
876         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
877         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
878         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
879         a->s_maxnz += bs2*CHUNKSIZE;
880         a->reallocs++;
881         a->s_nz++;
882       }
883 
884       N = nrow++ - 1;
885       /* shift up all the later entries in this row */
886       for (ii=N; ii>=i; ii--) {
887         rp[ii+1] = rp[ii];
888         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
889       }
890       if (N>=i) {
891         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
892       }
893       rp[i]                      = bcol;
894       ap[bs2*i + bs*cidx + ridx] = value;
895       noinsert1:;
896       low = i;
897       /* } */
898         }
899       } /* end of if .. if..  */
900     }                     /* end of loop over added columns */
901     ailen[brow] = nrow;
902   }                       /* end of loop over added rows */
903 
904   PetscFunctionReturn(0);
905 }
906 
907 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
908 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
909 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
910 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
911 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
912 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
913 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
914 extern int MatScale_SeqSBAIJ(Scalar*,Mat);
915 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
916 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
917 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
918 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
919 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
920 extern int MatZeroEntries_SeqSBAIJ(Mat);
921 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
922 
923 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
924 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
925 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
926 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
927 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
928 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
929 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
930 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
931 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
932 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
933 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
934 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
935 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
936 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
937 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
938 
939 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
940 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
941 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
942 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
943 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
944 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
945 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
946 
947 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
948 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
949 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
950 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
951 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
952 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
953 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
954 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
955 
956 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
957 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
958 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
959 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
960 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
961 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
962 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
963 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
964 
965 #ifdef HAVE_MatIncompleteCholeskyFactor
966 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
967 #undef __FUNC__
968 #define __FUNC__ "MatIncompleteCholeskyFactor_SeqSBAIJ"
969 int MatIncompleteCholeskyFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
970 {
971   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
972   Mat         outA;
973   int         ierr;
974   PetscTruth  row_identity,col_identity;
975 
976   PetscFunctionBegin;
977   /*
978   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
979   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
980   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
981   if (!row_identity || !col_identity) {
982     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
983   }
984   */
985 
986   outA          = inA;
987   inA->factor   = FACTOR_LU;
988 
989   if (!a->diag) {
990     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
991   }
992   /*
993       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
994       for ILU(0) factorization with natural ordering
995   */
996   switch (a->bs) {
997   case 1:
998     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_2_NaturalOrdering; /* 2? */
999     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1000   case 2:
1001     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1002     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1003     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1004     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1005     break;
1006   case 3:
1007     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1008     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1009     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1010     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1011     break;
1012   case 4:
1013     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1014     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1015     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1016     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1017     break;
1018   case 5:
1019     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1020     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1021     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1022     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1023     break;
1024   case 6:
1025     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1026     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1027     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1028     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1029     break;
1030   case 7:
1031     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1032     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1033     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1034     PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1035     break;
1036   default:
1037     a->row        = row;
1038     a->icol       = col;
1039     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1040     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1041 
1042     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1043     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1044     PLogObjectParent(inA,a->icol);
1045 
1046     if (!a->solve_work) {
1047       a->solve_work = (Scalar*)PetscMalloc((A->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1048       PLogObjectMemory(inA,(A->m+a->bs)*sizeof(Scalar));
1049     }
1050   }
1051 
1052   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1053 
1054   PetscFunctionReturn(0);
1055 }
1056 #endif
1057 
1058 #undef __FUNC__
1059 #define __FUNC__ "MatPrintHelp_SeqSBAIJ"
1060 int MatPrintHelp_SeqSBAIJ(Mat A)
1061 {
1062   static PetscTruth called = PETSC_FALSE;
1063   MPI_Comm          comm = A->comm;
1064   int               ierr;
1065 
1066   PetscFunctionBegin;
1067   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1068   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1069   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 EXTERN_C_BEGIN
1074 #undef __FUNC__
1075 #define __FUNC__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1076 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1077 {
1078   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1079   int         i,nz,n;
1080 
1081   PetscFunctionBegin;
1082   nz = baij->s_maxnz;
1083   n  = mat->n;
1084   for (i=0; i<nz; i++) {
1085     baij->j[i] = indices[i];
1086   }
1087   baij->s_nz = nz;
1088   for (i=0; i<n; i++) {
1089     baij->ilen[i] = baij->imax[i];
1090   }
1091 
1092   PetscFunctionReturn(0);
1093 }
1094 EXTERN_C_END
1095 
1096 #undef __FUNC__
1097 #define __FUNC__ "MatSeqSBAIJSetColumnIndices"
1098 /*@
1099     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1100        in the matrix.
1101 
1102   Input Parameters:
1103 +  mat     - the SeqSBAIJ matrix
1104 -  indices - the column indices
1105 
1106   Level: advanced
1107 
1108   Notes:
1109     This can be called if you have precomputed the nonzero structure of the
1110   matrix and want to provide it to the matrix object to improve the performance
1111   of the MatSetValues() operation.
1112 
1113     You MUST have set the correct numbers of nonzeros per row in the call to
1114   MatCreateSeqSBAIJ().
1115 
1116     MUST be called before any calls to MatSetValues()
1117 
1118 .seealso: MatCreateSeqSBAIJ
1119 @*/
1120 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1121 {
1122   int ierr,(*f)(Mat,int *);
1123 
1124   PetscFunctionBegin;
1125   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1126   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1127   if (f) {
1128     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1129   } else {
1130     SETERRQ(1,"Wrong type of matrix to set column indices");
1131   }
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 /* -------------------------------------------------------------------*/
1136 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1137        MatGetRow_SeqSBAIJ,
1138        MatRestoreRow_SeqSBAIJ,
1139        MatMult_SeqSBAIJ_N,
1140        MatMultAdd_SeqSBAIJ_N,
1141        MatMultTranspose_SeqSBAIJ,
1142        MatMultTransposeAdd_SeqSBAIJ,
1143        MatSolve_SeqSBAIJ_N,
1144        0,
1145        0,
1146        0,
1147        0,
1148        MatCholeskyFactor_SeqSBAIJ,
1149        0,
1150        MatTranspose_SeqSBAIJ,
1151        MatGetInfo_SeqSBAIJ,
1152        MatEqual_SeqSBAIJ,
1153        MatGetDiagonal_SeqSBAIJ,
1154        MatDiagonalScale_SeqSBAIJ,
1155        MatNorm_SeqSBAIJ,
1156        0,
1157        MatAssemblyEnd_SeqSBAIJ,
1158        0,
1159        MatSetOption_SeqSBAIJ,
1160        MatZeroEntries_SeqSBAIJ,
1161        MatZeroRows_SeqSBAIJ,
1162        0,
1163        0,
1164        MatCholeskyFactorSymbolic_SeqSBAIJ,
1165        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1166        0,
1167        0,
1168        MatGetOwnershipRange_SeqSBAIJ,
1169        0,
1170        MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ,
1171        0,
1172        0,
1173        MatDuplicate_SeqSBAIJ,
1174        0,
1175        0,
1176        0,
1177        0,
1178        0,
1179        MatGetSubMatrices_SeqSBAIJ,
1180        MatIncreaseOverlap_SeqSBAIJ,
1181        MatGetValues_SeqSBAIJ,
1182        0,
1183        MatPrintHelp_SeqSBAIJ,
1184        MatScale_SeqSBAIJ,
1185        0,
1186        0,
1187        0,
1188        MatGetBlockSize_SeqSBAIJ,
1189        MatGetRowIJ_SeqSBAIJ,
1190        MatRestoreRowIJ_SeqSBAIJ,
1191        0,
1192        0,
1193        0,
1194        0,
1195        0,
1196        0,
1197        MatSetValuesBlocked_SeqSBAIJ,
1198        MatGetSubMatrix_SeqSBAIJ,
1199        0,
1200        0,
1201        MatGetMaps_Petsc,
1202        0,
1203        0,
1204        0,
1205        0,
1206        0,
1207        0,
1208        MatGetRowMax_SeqSBAIJ};
1209 
1210 EXTERN_C_BEGIN
1211 #undef __FUNC__
1212 #define __FUNC__ "MatStoreValues_SeqSBAIJ"
1213 int MatStoreValues_SeqSBAIJ(Mat mat)
1214 {
1215   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1216   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1217   int         ierr;
1218 
1219   PetscFunctionBegin;
1220   if (aij->nonew != 1) {
1221     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1222   }
1223 
1224   /* allocate space for values if not already there */
1225   if (!aij->saved_values) {
1226     aij->saved_values = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1227   }
1228 
1229   /* copy values over */
1230   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 EXTERN_C_END
1234 
1235 EXTERN_C_BEGIN
1236 #undef __FUNC__
1237 #define __FUNC__ "MatRetrieveValues_SeqSBAIJ"
1238 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1239 {
1240   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1241   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1242 
1243   PetscFunctionBegin;
1244   if (aij->nonew != 1) {
1245     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1246   }
1247   if (!aij->saved_values) {
1248     SETERRQ(1,"Must call MatStoreValues(A);first");
1249   }
1250 
1251   /* copy values over */
1252   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 EXTERN_C_END
1256 
1257 EXTERN_C_BEGIN
1258 #undef __FUNC__
1259 #define __FUNC__ "MatCreate_SeqSBAIJ"
1260 int MatCreate_SeqSBAIJ(Mat B)
1261 {
1262   Mat_SeqSBAIJ *b;
1263   int          i,ierr,size;
1264   PetscTruth   flg;
1265   int          s_nz;
1266 
1267   PetscFunctionBegin;
1268   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1269   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1270 
1271   B->data = (void*)(b = PetscNew(Mat_SeqSBAIJ));CHKPTRQ(b);
1272   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1273   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1274   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1275   B->ops->view        = MatView_SeqSBAIJ;
1276   B->factor           = 0;
1277   B->lupivotthreshold = 1.0;
1278   B->mapping          = 0;
1279   b->row              = 0;
1280   b->icol             = 0;
1281   b->reallocs         = 0;
1282   b->saved_values     = 0;
1283 
1284   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1285   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1286 
1287   b->sorted           = PETSC_FALSE;
1288   b->roworiented      = PETSC_TRUE;
1289   b->nonew            = 0;
1290   b->diag             = 0;
1291   b->solve_work       = 0;
1292   b->mult_work        = 0;
1293   b->spptr            = 0;
1294   b->keepzeroedrows   = PETSC_FALSE;
1295 
1296   b->inew             = 0;
1297   b->jnew             = 0;
1298   b->anew             = 0;
1299   b->a2anew           = 0;
1300   b->permute          = PETSC_FALSE;
1301 
1302   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1303                                      "MatStoreValues_SeqSBAIJ",
1304                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1306                                      "MatRetrieveValues_SeqSBAIJ",
1307                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1309                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1310                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 EXTERN_C_END
1314 
1315 #undef __FUNC__
1316 #define __FUNC__ "MatSeqSBAIJSetNonzeroStructure"
1317 /*@C
1318    MatSeqSBAIJSetNonzeroStructure - Creates a sparse symmetric matrix in block AIJ (block
1319    compressed row) format.  For good matrix assembly performance the
1320    user should preallocate the matrix storage by setting the parameter nz
1321    (or the array nnz).  By setting these parameters accurately, performance
1322    during matrix assembly can be increased by more than a factor of 50.
1323 
1324    Collective on Mat
1325 
1326    Input Parameters:
1327 +  A - the symmetric matrix
1328 .  bs - size of block
1329 .  nz - number of block nonzeros per block row (same for all rows)
1330 -  nnz - array containing the number of block nonzeros in the various block rows
1331          (possibly different for each block row) or PETSC_NULL
1332 
1333    Options Database Keys:
1334 .   -mat_no_unroll - uses code that does not unroll the loops in the
1335                      block calculations (much slower)
1336 .    -mat_block_size - size of the blocks to use
1337 
1338    Level: intermediate
1339 
1340    Notes:
1341    The block AIJ format is compatible with standard Fortran 77
1342    storage.  That is, the stored row and column indices can begin at
1343    either one (as in Fortran) or zero.  See the users' manual for details.
1344 
1345    Specify the preallocated storage with either nz or nnz (not both).
1346    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1347    allocation.  For additional details, see the users manual chapter on
1348    matrices.
1349 
1350 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1351 @*/
1352 int MatSeqSBAIJSetNonzeroStructure(Mat B,int bs,int nz,int *nnz)
1353 {
1354   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1355   int          i,len,ierr,mbs,bs2,size;
1356   PetscTruth   flg;
1357   int          s_nz;
1358 
1359   PetscFunctionBegin;
1360   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1361   mbs  = B->m/bs;
1362   bs2  = bs*bs;
1363 
1364   if (mbs*bs != B->m) {
1365     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1366   }
1367 
1368   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than -2: value %d",nz);
1369   if (nnz) {
1370     for (i=0; i<mbs; i++) {
1371       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1372     }
1373   }
1374 
1375   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1376   if (!flg) {
1377     switch (bs) {
1378     case 1:
1379       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1380       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1381       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1382       B->ops->mult            = MatMult_SeqSBAIJ_1;
1383       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1384       break;
1385     case 2:
1386       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1387       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1388       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1389       B->ops->mult            = MatMult_SeqSBAIJ_2;
1390       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1391       break;
1392     case 3:
1393       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1394       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1395       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1396       B->ops->mult            = MatMult_SeqSBAIJ_3;
1397       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1398       break;
1399     case 4:
1400       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1401       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1402       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1403       B->ops->mult            = MatMult_SeqSBAIJ_4;
1404       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1405       break;
1406     case 5:
1407       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1408       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1409       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1410       B->ops->mult            = MatMult_SeqSBAIJ_5;
1411       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1412       break;
1413     case 6:
1414       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1415       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1416       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1417       B->ops->mult            = MatMult_SeqSBAIJ_6;
1418       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1419       break;
1420     case 7:
1421       B->ops->mult            = MatMult_SeqSBAIJ_7;
1422       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1423       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1424       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1425       break;
1426     }
1427   }
1428 
1429   b->mbs     = mbs;
1430   b->nbs     = mbs;
1431   b->imax    = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(b->imax);
1432   if (!nnz) {
1433     if (nz == PETSC_DEFAULT) nz = 5;
1434     else if (nz <= 0)        nz = 1;
1435     for (i=0; i<mbs; i++) {
1436       b->imax[i] = nz;
1437     }
1438     nz = nz*mbs; /* total nz */
1439   } else {
1440     nz = 0;
1441     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1442   }
1443   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1444   s_nz = nz;
1445 
1446   /* allocate the matrix space */
1447   len   = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1448   b->a  = (MatScalar*)PetscMalloc(len);CHKPTRQ(b->a);
1449   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1450   b->j  = (int*)(b->a + s_nz*bs2);
1451   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1452   b->i  = b->j + s_nz;
1453   b->singlemalloc = PETSC_TRUE;
1454 
1455   /* pointer to beginning of each row */
1456   b->i[0] = 0;
1457   for (i=1; i<mbs+1; i++) {
1458     b->i[i] = b->i[i-1] + b->imax[i-1];
1459   }
1460 
1461   /* b->ilen will count nonzeros in each block row so far. */
1462   b->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int));
1463   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1464   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1465 
1466   b->bs               = bs;
1467   b->bs2              = bs2;
1468   b->s_nz             = 0;
1469   b->s_maxnz          = s_nz*bs2;
1470 
1471   b->inew             = 0;
1472   b->jnew             = 0;
1473   b->anew             = 0;
1474   b->a2anew           = 0;
1475   b->permute          = PETSC_FALSE;
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 
1480 #undef __FUNC__
1481 #define __FUNC__ "MatCreateSeqSBAIJ"
1482 /*@C
1483    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1484    compressed row) format.  For good matrix assembly performance the
1485    user should preallocate the matrix storage by setting the parameter nz
1486    (or the array nnz).  By setting these parameters accurately, performance
1487    during matrix assembly can be increased by more than a factor of 50.
1488 
1489    Collective on MPI_Comm
1490 
1491    Input Parameters:
1492 +  comm - MPI communicator, set to PETSC_COMM_SELF
1493 .  bs - size of block
1494 .  m - number of rows, or number of columns
1495 .  nz - number of block nonzeros per block row (same for all rows)
1496 -  nnz - array containing the number of block nonzeros in the various block rows
1497          (possibly different for each block row) or PETSC_NULL
1498 
1499    Output Parameter:
1500 .  A - the symmetric matrix
1501 
1502    Options Database Keys:
1503 .   -mat_no_unroll - uses code that does not unroll the loops in the
1504                      block calculations (much slower)
1505 .    -mat_block_size - size of the blocks to use
1506 
1507    Level: intermediate
1508 
1509    Notes:
1510    The block AIJ format is compatible with standard Fortran 77
1511    storage.  That is, the stored row and column indices can begin at
1512    either one (as in Fortran) or zero.  See the users' manual for details.
1513 
1514    Specify the preallocated storage with either nz or nnz (not both).
1515    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1516    allocation.  For additional details, see the users manual chapter on
1517    matrices.
1518 
1519 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1520 @*/
1521 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1522 {
1523   int ierr;
1524 
1525   PetscFunctionBegin;
1526   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1527   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1528   ierr = MatSeqSBAIJSetNonzeroStructure(*A,bs,nz,nnz);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNC__
1533 #define __FUNC__ "MatDuplicate_SeqSBAIJ"
1534 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1535 {
1536   Mat         C;
1537   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1538   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1539 
1540   PetscFunctionBegin;
1541   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1542 
1543   *B = 0;
1544   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&C);CHKERRQ(ierr);
1545   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1546   C->data           = (void*)(c = PetscNew(Mat_SeqSBAIJ));CHKPTRQ(c);
1547   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1548   C->ops->destroy   = MatDestroy_SeqSBAIJ;
1549   C->ops->view      = MatView_SeqSBAIJ;
1550   C->factor         = A->factor;
1551   c->row            = 0;
1552   c->icol           = 0;
1553   c->saved_values   = 0;
1554   c->keepzeroedrows = a->keepzeroedrows;
1555   C->assembled      = PETSC_TRUE;
1556 
1557   c->bs         = a->bs;
1558   c->bs2        = a->bs2;
1559   c->mbs        = a->mbs;
1560   c->nbs        = a->nbs;
1561 
1562   c->imax       = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1563   c->ilen       = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1564   for (i=0; i<mbs; i++) {
1565     c->imax[i] = a->imax[i];
1566     c->ilen[i] = a->ilen[i];
1567   }
1568 
1569   /* allocate the matrix space */
1570   c->singlemalloc = PETSC_TRUE;
1571   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1572   c->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(c->a);
1573   c->j = (int*)(c->a + nz*bs2);
1574   c->i = c->j + nz;
1575   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1576   if (mbs > 0) {
1577     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1578     if (cpvalues == MAT_COPY_VALUES) {
1579       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1580     } else {
1581       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1582     }
1583   }
1584 
1585   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1586   c->sorted      = a->sorted;
1587   c->roworiented = a->roworiented;
1588   c->nonew       = a->nonew;
1589 
1590   if (a->diag) {
1591     c->diag = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->diag);
1592     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1593     for (i=0; i<mbs; i++) {
1594       c->diag[i] = a->diag[i];
1595     }
1596   } else c->diag        = 0;
1597   c->s_nz               = a->s_nz;
1598   c->s_maxnz            = a->s_maxnz;
1599   c->solve_work         = 0;
1600   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1601   c->mult_work          = 0;
1602   *B = C;
1603   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 EXTERN_C_BEGIN
1608 #undef __FUNC__
1609 #define __FUNC__ "MatLoad_SeqSBAIJ"
1610 int MatLoad_SeqSBAIJ(Viewer viewer,MatType type,Mat *A)
1611 {
1612   Mat_SeqSBAIJ  *a;
1613   Mat          B;
1614   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1615   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,*s_browlengths,maskcount;
1616   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1617   int          *masked,nmask,tmp,bs2,ishift;
1618   Scalar       *aa;
1619   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1620 
1621   PetscFunctionBegin;
1622   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1623   bs2  = bs*bs;
1624 
1625   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1626   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1627   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1628   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1629   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1630   M = header[1]; N = header[2]; nz = header[3];
1631 
1632   if (header[3] < 0) {
1633     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1634   }
1635 
1636   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1637 
1638   /*
1639      This code adds extra rows to make sure the number of rows is
1640     divisible by the blocksize
1641   */
1642   mbs        = M/bs;
1643   extra_rows = bs - M + bs*(mbs);
1644   if (extra_rows == bs) extra_rows = 0;
1645   else                  mbs++;
1646   if (extra_rows) {
1647     PLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1648   }
1649 
1650   /* read in row lengths */
1651   rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1652   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1653   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1654 
1655   /* read in column indices */
1656   jj = (int*)PetscMalloc((nz+extra_rows)*sizeof(int));CHKPTRQ(jj);
1657   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1658   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1659 
1660   /* loop over row lengths determining block row lengths */
1661   browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1662   s_browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(s_browlengths);
1663   ierr        = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1664   mask        = (int*)PetscMalloc(2*mbs*sizeof(int));CHKPTRQ(mask);
1665   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1666   masked      = mask + mbs;
1667   rowcount    = 0; nzcount = 0;
1668   for (i=0; i<mbs; i++) {
1669     nmask = 0;
1670     for (j=0; j<bs; j++) {
1671       kmax = rowlengths[rowcount];
1672       for (k=0; k<kmax; k++) {
1673         tmp = jj[nzcount++]/bs;   /* block col. index */
1674         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1675       }
1676       rowcount++;
1677     }
1678     s_browlengths[i] += nmask;
1679     browlengths[i]    = 2*s_browlengths[i];
1680 
1681     /* zero out the mask elements we set */
1682     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1683   }
1684 
1685   /* create our matrix */
1686   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1687   B = *A;
1688   a = (Mat_SeqSBAIJ*)B->data;
1689 
1690   /* set matrix "i" values */
1691   a->i[0] = 0;
1692   for (i=1; i<= mbs; i++) {
1693     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1694     a->ilen[i-1] = s_browlengths[i-1];
1695   }
1696   a->s_nz         = 0;
1697   for (i=0; i<mbs; i++) a->s_nz += s_browlengths[i];
1698 
1699   /* read in nonzero values */
1700   aa = (Scalar*)PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1701   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1702   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1703 
1704   /* set "a" and "j" values into matrix */
1705   nzcount = 0; jcount = 0;
1706   for (i=0; i<mbs; i++) {
1707     nzcountb = nzcount;
1708     nmask    = 0;
1709     for (j=0; j<bs; j++) {
1710       kmax = rowlengths[i*bs+j];
1711       for (k=0; k<kmax; k++) {
1712         tmp = jj[nzcount++]/bs; /* block col. index */
1713         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1714       }
1715       rowcount++;
1716     }
1717     /* sort the masked values */
1718     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1719 
1720     /* set "j" values into matrix */
1721     maskcount = 1;
1722     for (j=0; j<nmask; j++) {
1723       a->j[jcount++]  = masked[j];
1724       mask[masked[j]] = maskcount++;
1725     }
1726 
1727     /* set "a" values into matrix */
1728     ishift = bs2*a->i[i];
1729     for (j=0; j<bs; j++) {
1730       kmax = rowlengths[i*bs+j];
1731       for (k=0; k<kmax; k++) {
1732         tmp       = jj[nzcountb]/bs ; /* block col. index */
1733         if (tmp >= i){
1734           block     = mask[tmp] - 1;
1735           point     = jj[nzcountb] - bs*tmp;
1736           idx       = ishift + bs2*block + j + bs*point;
1737           a->a[idx] = aa[nzcountb];
1738         }
1739         nzcountb++;
1740       }
1741     }
1742     /* zero out the mask elements we set */
1743     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1744   }
1745   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1746 
1747   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1748   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1749   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1750   ierr = PetscFree(aa);CHKERRQ(ierr);
1751   ierr = PetscFree(jj);CHKERRQ(ierr);
1752   ierr = PetscFree(mask);CHKERRQ(ierr);
1753 
1754   B->assembled = PETSC_TRUE;
1755   ierr = MatView_Private(B);CHKERRQ(ierr);
1756   PetscFunctionReturn(0);
1757 }
1758 EXTERN_C_END
1759 
1760 
1761 
1762