xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 737d121a2b57b182d532aa04ba3c4e4d7f0dd66e)
1 
2 /*
3     Defines the basic matrix operations for the SBAIJ (compressed row)
4   matrix storage format.
5 */
6 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
7 #include "src/inline/spops.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 
10 #define CHUNKSIZE  10
11 
12 /*
13      Checks for missing diagonals
14 */
15 #undef __FUNCT__
16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
18 {
19   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
20   PetscErrorCode ierr;
21   int         *diag,*jj = a->j,i;
22 
23   PetscFunctionBegin;
24   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
25   diag = a->diag;
26   for (i=0; i<a->mbs; i++) {
27     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
28   }
29   PetscFunctionReturn(0);
30 }
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
35 {
36   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
37   PetscErrorCode ierr;
38   int          i,mbs = a->mbs;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int nz = a->i[n];
63     for (i=0; i<nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int nz = a->i[n]-1;
84     for (i=0; i<nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 PetscErrorCode MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   PetscErrorCode ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   case MAT_NOT_SYMMETRIC:
183   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
184   case MAT_HERMITIAN:
185     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
186   case MAT_SYMMETRIC:
187   case MAT_STRUCTURALLY_SYMMETRIC:
188   case MAT_NOT_HERMITIAN:
189   case MAT_SYMMETRY_ETERNAL:
190   case MAT_NOT_SYMMETRY_ETERNAL:
191     break;
192   default:
193     SETERRQ(PETSC_ERR_SUP,"unknown option");
194   }
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
200 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
201 {
202   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
203   PetscErrorCode ierr;
204   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
205   MatScalar    *aa,*aa_i;
206   PetscScalar  *v_i;
207 
208   PetscFunctionBegin;
209   bs  = a->bs;
210   ai  = a->i;
211   aj  = a->j;
212   aa  = a->a;
213   bs2 = a->bs2;
214 
215   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
216 
217   bn  = row/bs;   /* Block number */
218   bp  = row % bs; /* Block position */
219   M   = ai[bn+1] - ai[bn];
220   *ncols = bs*M;
221 
222   if (v) {
223     *v = 0;
224     if (*ncols) {
225       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
226       for (i=0; i<M; i++) { /* for each block in the block row */
227         v_i  = *v + i*bs;
228         aa_i = aa + bs2*(ai[bn] + i);
229         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
230       }
231     }
232   }
233 
234   if (cols) {
235     *cols = 0;
236     if (*ncols) {
237       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
238       for (i=0; i<M; i++) { /* for each block in the block row */
239         cols_i = *cols + i*bs;
240         itmp  = bs*aj[ai[bn] + i];
241         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
242       }
243     }
244   }
245 
246   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
247   /* this segment is currently removed, so only entries in the upper triangle are obtained */
248 #ifdef column_search
249   v_i    = *v    + M*bs;
250   cols_i = *cols + M*bs;
251   for (i=0; i<bn; i++){ /* for each block row */
252     M = ai[i+1] - ai[i];
253     for (j=0; j<M; j++){
254       itmp = aj[ai[i] + j];    /* block column value */
255       if (itmp == bn){
256         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
257         for (k=0; k<bs; k++) {
258           *cols_i++ = i*bs+k;
259           *v_i++    = aa_i[k];
260         }
261         *ncols += bs;
262         break;
263       }
264     }
265   }
266 #endif
267 
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
273 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
274 {
275   PetscErrorCode ierr;
276 
277   PetscFunctionBegin;
278   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
279   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
285 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
286 {
287   PetscErrorCode ierr;
288   PetscFunctionBegin;
289   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
295 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
296 {
297   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
298   PetscErrorCode ierr;
299   int i,j,bs = a->bs,k,l,bs2=a->bs2;
300   char              *name;
301   PetscViewerFormat format;
302 
303   PetscFunctionBegin;
304   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
305   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
306   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
307     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
308   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
309     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
310   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
311     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
312     for (i=0; i<a->mbs; i++) {
313       for (j=0; j<bs; j++) {
314         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
315         for (k=a->i[i]; k<a->i[i+1]; k++) {
316           for (l=0; l<bs; l++) {
317 #if defined(PETSC_USE_COMPLEX)
318             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
319               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
320                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
321             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
322               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
323                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
324             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
325               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
326             }
327 #else
328             if (a->a[bs2*k + l*bs + j] != 0.0) {
329               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
330             }
331 #endif
332           }
333         }
334         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
335       }
336     }
337     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
338   } else {
339     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
340     for (i=0; i<a->mbs; i++) {
341       for (j=0; j<bs; j++) {
342         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
343         for (k=a->i[i]; k<a->i[i+1]; k++) {
344           for (l=0; l<bs; l++) {
345 #if defined(PETSC_USE_COMPLEX)
346             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
347               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
348                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
349             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
350               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
351                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
352             } else {
353               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
354             }
355 #else
356             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
357 #endif
358           }
359         }
360         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
361       }
362     }
363     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
364   }
365   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
366   PetscFunctionReturn(0);
367 }
368 
369 #undef __FUNCT__
370 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
371 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
372 {
373   Mat          A = (Mat) Aa;
374   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
375   PetscErrorCode ierr;
376   int          row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
377   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
378   MatScalar    *aa;
379   MPI_Comm     comm;
380   PetscViewer  viewer;
381 
382   PetscFunctionBegin;
383   /*
384       This is nasty. If this is called from an originally parallel matrix
385    then all processes call this,but only the first has the matrix so the
386    rest should return immediately.
387   */
388   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
389   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
390   if (rank) PetscFunctionReturn(0);
391 
392   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
393 
394   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
395   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
396 
397   /* loop over matrix elements drawing boxes */
398   color = PETSC_DRAW_BLUE;
399   for (i=0,row=0; i<mbs; i++,row+=bs) {
400     for (j=a->i[i]; j<a->i[i+1]; j++) {
401       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
402       x_l = a->j[j]*bs; x_r = x_l + 1.0;
403       aa = a->a + j*bs2;
404       for (k=0; k<bs; k++) {
405         for (l=0; l<bs; l++) {
406           if (PetscRealPart(*aa++) >=  0.) continue;
407           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
408         }
409       }
410     }
411   }
412   color = PETSC_DRAW_CYAN;
413   for (i=0,row=0; i<mbs; i++,row+=bs) {
414     for (j=a->i[i]; j<a->i[i+1]; j++) {
415       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
416       x_l = a->j[j]*bs; x_r = x_l + 1.0;
417       aa = a->a + j*bs2;
418       for (k=0; k<bs; k++) {
419         for (l=0; l<bs; l++) {
420           if (PetscRealPart(*aa++) != 0.) continue;
421           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
422         }
423       }
424     }
425   }
426 
427   color = PETSC_DRAW_RED;
428   for (i=0,row=0; i<mbs; i++,row+=bs) {
429     for (j=a->i[i]; j<a->i[i+1]; j++) {
430       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
431       x_l = a->j[j]*bs; x_r = x_l + 1.0;
432       aa = a->a + j*bs2;
433       for (k=0; k<bs; k++) {
434         for (l=0; l<bs; l++) {
435           if (PetscRealPart(*aa++) <= 0.) continue;
436           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
437         }
438       }
439     }
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
446 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
447 {
448   PetscErrorCode ierr;
449   PetscReal    xl,yl,xr,yr,w,h;
450   PetscDraw    draw;
451   PetscTruth   isnull;
452 
453   PetscFunctionBegin;
454 
455   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
456   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
457 
458   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
459   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
460   xr += w;    yr += h;  xl = -w;     yl = -h;
461   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
462   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
463   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 #undef __FUNCT__
468 #define __FUNCT__ "MatView_SeqSBAIJ"
469 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
470 {
471   PetscErrorCode ierr;
472   PetscTruth iascii,isdraw;
473 
474   PetscFunctionBegin;
475   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
476   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
477   if (iascii){
478     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
479   } else if (isdraw) {
480     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
481   } else {
482     Mat B;
483     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
484     ierr = MatView(B,viewer);CHKERRQ(ierr);
485     ierr = MatDestroy(B);CHKERRQ(ierr);
486   }
487   PetscFunctionReturn(0);
488 }
489 
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
493 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
494 {
495   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
496   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
497   int        *ai = a->i,*ailen = a->ilen;
498   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
499   MatScalar  *ap,*aa = a->a,zero = 0.0;
500 
501   PetscFunctionBegin;
502   for (k=0; k<m; k++) { /* loop over rows */
503     row  = im[k]; brow = row/bs;
504     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
505     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
506     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
507     nrow = ailen[brow];
508     for (l=0; l<n; l++) { /* loop over columns */
509       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
510       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
511       col  = in[l] ;
512       bcol = col/bs;
513       cidx = col%bs;
514       ridx = row%bs;
515       high = nrow;
516       low  = 0; /* assume unsorted */
517       while (high-low > 5) {
518         t = (low+high)/2;
519         if (rp[t] > bcol) high = t;
520         else             low  = t;
521       }
522       for (i=low; i<high; i++) {
523         if (rp[i] > bcol) break;
524         if (rp[i] == bcol) {
525           *v++ = ap[bs2*i+bs*cidx+ridx];
526           goto finished;
527         }
528       }
529       *v++ = zero;
530       finished:;
531     }
532   }
533   PetscFunctionReturn(0);
534 }
535 
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
539 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
540 {
541   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
542   PetscErrorCode ierr;
543   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
544   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
545   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
546   PetscTruth      roworiented=a->roworiented;
547   const MatScalar *value = v;
548   MatScalar       *ap,*aa = a->a,*bap;
549 
550   PetscFunctionBegin;
551   if (roworiented) {
552     stepval = (n-1)*bs;
553   } else {
554     stepval = (m-1)*bs;
555   }
556   for (k=0; k<m; k++) { /* loop over added rows */
557     row  = im[k];
558     if (row < 0) continue;
559 #if defined(PETSC_USE_BOPT_g)
560     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
561 #endif
562     rp   = aj + ai[row];
563     ap   = aa + bs2*ai[row];
564     rmax = imax[row];
565     nrow = ailen[row];
566     low  = 0;
567     for (l=0; l<n; l++) { /* loop over added columns */
568       if (in[l] < 0) continue;
569       col = in[l];
570 #if defined(PETSC_USE_BOPT_g)
571       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
572 #endif
573       if (col < row) continue; /* ignore lower triangular block */
574       if (roworiented) {
575         value = v + k*(stepval+bs)*bs + l*bs;
576       } else {
577         value = v + l*(stepval+bs)*bs + k*bs;
578       }
579       if (!sorted) low = 0; high = nrow;
580       while (high-low > 7) {
581         t = (low+high)/2;
582         if (rp[t] > col) high = t;
583         else             low  = t;
584       }
585       for (i=low; i<high; i++) {
586         if (rp[i] > col) break;
587         if (rp[i] == col) {
588           bap  = ap +  bs2*i;
589           if (roworiented) {
590             if (is == ADD_VALUES) {
591               for (ii=0; ii<bs; ii++,value+=stepval) {
592                 for (jj=ii; jj<bs2; jj+=bs) {
593                   bap[jj] += *value++;
594                 }
595               }
596             } else {
597               for (ii=0; ii<bs; ii++,value+=stepval) {
598                 for (jj=ii; jj<bs2; jj+=bs) {
599                   bap[jj] = *value++;
600                 }
601               }
602             }
603           } else {
604             if (is == ADD_VALUES) {
605               for (ii=0; ii<bs; ii++,value+=stepval) {
606                 for (jj=0; jj<bs; jj++) {
607                   *bap++ += *value++;
608                 }
609               }
610             } else {
611               for (ii=0; ii<bs; ii++,value+=stepval) {
612                 for (jj=0; jj<bs; jj++) {
613                   *bap++  = *value++;
614                 }
615               }
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       if (nonew == 1) goto noinsert2;
622       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
623       if (nrow >= rmax) {
624         /* there is no extra room in row, therefore enlarge */
625         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
626         MatScalar *new_a;
627 
628         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
629 
630         /* malloc new storage space */
631         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
632 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
633         new_j   = (int*)(new_a + bs2*new_nz);
634         new_i   = new_j + new_nz;
635 
636         /* copy over old data into new slots */
637         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
638         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
639         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
640         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
641         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
642         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
643         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
644         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
645         /* free up old matrix storage */
646         ierr = PetscFree(a->a);CHKERRQ(ierr);
647         if (!a->singlemalloc) {
648           ierr = PetscFree(a->i);CHKERRQ(ierr);
649           ierr = PetscFree(a->j);CHKERRQ(ierr);
650         }
651         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
652         a->singlemalloc = PETSC_TRUE;
653 
654         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
655         rmax = imax[row] = imax[row] + CHUNKSIZE;
656         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
657         a->maxnz += bs2*CHUNKSIZE;
658         a->reallocs++;
659         a->nz++;
660       }
661       N = nrow++ - 1;
662       /* shift up all the later entries in this row */
663       for (ii=N; ii>=i; ii--) {
664         rp[ii+1] = rp[ii];
665         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
666       }
667       if (N >= i) {
668         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
669       }
670       rp[i] = col;
671       bap   = ap +  bs2*i;
672       if (roworiented) {
673         for (ii=0; ii<bs; ii++,value+=stepval) {
674           for (jj=ii; jj<bs2; jj+=bs) {
675             bap[jj] = *value++;
676           }
677         }
678       } else {
679         for (ii=0; ii<bs; ii++,value+=stepval) {
680           for (jj=0; jj<bs; jj++) {
681             *bap++  = *value++;
682           }
683         }
684       }
685       noinsert2:;
686       low = i;
687     }
688     ailen[row] = nrow;
689   }
690   PetscFunctionReturn(0);
691 }
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
695 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
696 {
697   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
698   PetscErrorCode ierr;
699   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
700   int        m = A->m,*ip,N,*ailen = a->ilen;
701   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0;
702   MatScalar  *aa = a->a,*ap;
703 
704   PetscFunctionBegin;
705   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
706 
707   if (m) rmax = ailen[0];
708   for (i=1; i<mbs; i++) {
709     /* move each row back by the amount of empty slots (fshift) before it*/
710     fshift += imax[i-1] - ailen[i-1];
711     rmax   = PetscMax(rmax,ailen[i]);
712     if (fshift) {
713       ip = aj + ai[i]; ap = aa + bs2*ai[i];
714       N = ailen[i];
715       for (j=0; j<N; j++) {
716         ip[j-fshift] = ip[j];
717         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
718       }
719     }
720     ai[i] = ai[i-1] + ailen[i-1];
721   }
722   if (mbs) {
723     fshift += imax[mbs-1] - ailen[mbs-1];
724     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
725   }
726   /* reset ilen and imax for each row */
727   for (i=0; i<mbs; i++) {
728     ailen[i] = imax[i] = ai[i+1] - ai[i];
729   }
730   a->nz = ai[mbs];
731 
732   /* diagonals may have moved, reset it */
733   if (a->diag) {
734     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
735   }
736   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
737            m,A->m,a->bs,fshift*bs2,a->nz*bs2);
738   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
739            a->reallocs);
740   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
741   a->reallocs          = 0;
742   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
743 
744   PetscFunctionReturn(0);
745 }
746 
747 /*
748    This function returns an array of flags which indicate the locations of contiguous
749    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
750    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
751    Assume: sizes should be long enough to hold all the values.
752 */
753 #undef __FUNCT__
754 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
755 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
756 {
757   int        i,j,k,row;
758   PetscTruth flg;
759 
760   PetscFunctionBegin;
761   for (i=0,j=0; i<n; j++) {
762     row = idx[i];
763     if (row%bs!=0) { /* Not the begining of a block */
764       sizes[j] = 1;
765       i++;
766     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
767       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
768       i++;
769     } else { /* Begining of the block, so check if the complete block exists */
770       flg = PETSC_TRUE;
771       for (k=1; k<bs; k++) {
772         if (row+k != idx[i+k]) { /* break in the block */
773           flg = PETSC_FALSE;
774           break;
775         }
776       }
777       if (flg == PETSC_TRUE) { /* No break in the bs */
778         sizes[j] = bs;
779         i+= bs;
780       } else {
781         sizes[j] = 1;
782         i++;
783       }
784     }
785   }
786   *bs_max = j;
787   PetscFunctionReturn(0);
788 }
789 
790 #undef __FUNCT__
791 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
792 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
793 {
794   PetscFunctionBegin;
795   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
796 }
797 
798 /* Only add/insert a(i,j) with i<=j (blocks).
799    Any a(i,j) with i>j input by user is ingored.
800 */
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
804 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
805 {
806   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
807   PetscErrorCode ierr;
808   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
809   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
810   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
811   int         ridx,cidx,bs2=a->bs2;
812   MatScalar   *ap,value,*aa=a->a,*bap;
813 
814   PetscFunctionBegin;
815 
816   for (k=0; k<m; k++) { /* loop over added rows */
817     row  = im[k];       /* row number */
818     brow = row/bs;      /* block row number */
819     if (row < 0) continue;
820 #if defined(PETSC_USE_BOPT_g)
821     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
822 #endif
823     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
824     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
825     rmax = imax[brow];  /* maximum space allocated for this row */
826     nrow = ailen[brow]; /* actual length of this row */
827     low  = 0;
828 
829     for (l=0; l<n; l++) { /* loop over added columns */
830       if (in[l] < 0) continue;
831 #if defined(PETSC_USE_BOPT_g)
832       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
833 #endif
834       col = in[l];
835       bcol = col/bs;              /* block col number */
836 
837       if (brow <= bcol){
838         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
839         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
840           /* element value a(k,l) */
841           if (roworiented) {
842             value = v[l + k*n];
843           } else {
844             value = v[k + l*m];
845           }
846 
847           /* move pointer bap to a(k,l) quickly and add/insert value */
848           if (!sorted) low = 0; high = nrow;
849           while (high-low > 7) {
850             t = (low+high)/2;
851             if (rp[t] > bcol) high = t;
852             else              low  = t;
853           }
854           for (i=low; i<high; i++) {
855             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
856             if (rp[i] > bcol) break;
857             if (rp[i] == bcol) {
858               bap  = ap +  bs2*i + bs*cidx + ridx;
859               if (is == ADD_VALUES) *bap += value;
860               else                  *bap  = value;
861               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
862               if (brow == bcol && ridx < cidx){
863                 bap  = ap +  bs2*i + bs*ridx + cidx;
864                 if (is == ADD_VALUES) *bap += value;
865                 else                  *bap  = value;
866               }
867               goto noinsert1;
868             }
869           }
870 
871       if (nonew == 1) goto noinsert1;
872       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
873       if (nrow >= rmax) {
874         /* there is no extra room in row, therefore enlarge */
875         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
876         MatScalar *new_a;
877 
878         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
879 
880         /* Malloc new storage space */
881         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
882         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
883         new_j = (int*)(new_a + bs2*new_nz);
884         new_i = new_j + new_nz;
885 
886         /* copy over old data into new slots */
887         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
888         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
889         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
890         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
891         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
892         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
893         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
894         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
895         /* free up old matrix storage */
896         ierr = PetscFree(a->a);CHKERRQ(ierr);
897         if (!a->singlemalloc) {
898           ierr = PetscFree(a->i);CHKERRQ(ierr);
899           ierr = PetscFree(a->j);CHKERRQ(ierr);
900         }
901         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
902         a->singlemalloc = PETSC_TRUE;
903 
904         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
905         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
906         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
907         a->maxnz += bs2*CHUNKSIZE;
908         a->reallocs++;
909         a->nz++;
910       }
911 
912       N = nrow++ - 1;
913       /* shift up all the later entries in this row */
914       for (ii=N; ii>=i; ii--) {
915         rp[ii+1] = rp[ii];
916         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
917       }
918       if (N>=i) {
919         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
920       }
921       rp[i]                      = bcol;
922       ap[bs2*i + bs*cidx + ridx] = value;
923       noinsert1:;
924       low = i;
925       /* } */
926         }
927       } /* end of if .. if..  */
928     }                     /* end of loop over added columns */
929     ailen[brow] = nrow;
930   }                       /* end of loop over added rows */
931 
932   PetscFunctionReturn(0);
933 }
934 
935 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
936 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
937 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
938 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
939 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
940 EXTERN PetscErrorCode MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
941 EXTERN PetscErrorCode MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
942 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat);
943 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
944 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
945 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
946 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
947 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
948 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
949 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
950 
951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
959 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
960 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
961 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
962 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
963 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
964 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
965 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
966 
967 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
968 
969 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
970 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
971 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
972 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
973 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
974 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
975 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
976 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
977 
978 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
979 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
980 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
981 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
982 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
983 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
984 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
985 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
986 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
987 
988 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
989 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
990 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
991 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
992 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
993 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
994 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
995 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
996 
997 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
998 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
999 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1000 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1001 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1002 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1003 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1004 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1005 
1006 #ifdef HAVE_MatICCFactor
1007 /* modified from MatILUFactor_SeqSBAIJ, needs further work!  */
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1010 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1011 {
1012   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1013   Mat         outA;
1014   PetscErrorCode ierr;
1015   PetscTruth  row_identity,col_identity;
1016 
1017   PetscFunctionBegin;
1018   /*
1019   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1020   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1021   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1022   if (!row_identity || !col_identity) {
1023     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1024   }
1025   */
1026 
1027   outA          = inA;
1028   inA->factor   = FACTOR_CHOLESKY;
1029 
1030   if (!a->diag) {
1031     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1032   }
1033   /*
1034       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1035       for ILU(0) factorization with natural ordering
1036   */
1037   switch (a->bs) {
1038   case 1:
1039     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1040     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1041     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1042     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1043   case 2:
1044     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1045     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1046     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1047     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1048     break;
1049   case 3:
1050     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1051     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1052     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1053     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1054     break;
1055   case 4:
1056     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1057     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1058     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1059     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1060     break;
1061   case 5:
1062     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1063     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1064     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1065     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1066     break;
1067   case 6:
1068     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1069     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1070     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1071     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1072     break;
1073   case 7:
1074     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1075     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1076     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1077     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1078     break;
1079   default:
1080     a->row        = row;
1081     a->icol       = col;
1082     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1083     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1084 
1085     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1086     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1087     PetscLogObjectParent(inA,a->icol);
1088 
1089     if (!a->solve_work) {
1090       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1091       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1092     }
1093   }
1094 
1095   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 #endif
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1103 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1104 {
1105   static PetscTruth called = PETSC_FALSE;
1106   MPI_Comm          comm = A->comm;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1111   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1112   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 EXTERN_C_BEGIN
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1119 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1120 {
1121   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1122   int         i,nz,n;
1123 
1124   PetscFunctionBegin;
1125   nz = baij->maxnz;
1126   n  = mat->n;
1127   for (i=0; i<nz; i++) {
1128     baij->j[i] = indices[i];
1129   }
1130   baij->nz = nz;
1131   for (i=0; i<n; i++) {
1132     baij->ilen[i] = baij->imax[i];
1133   }
1134 
1135   PetscFunctionReturn(0);
1136 }
1137 EXTERN_C_END
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1141 /*@
1142     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1143        in the matrix.
1144 
1145   Input Parameters:
1146 +  mat     - the SeqSBAIJ matrix
1147 -  indices - the column indices
1148 
1149   Level: advanced
1150 
1151   Notes:
1152     This can be called if you have precomputed the nonzero structure of the
1153   matrix and want to provide it to the matrix object to improve the performance
1154   of the MatSetValues() operation.
1155 
1156     You MUST have set the correct numbers of nonzeros per row in the call to
1157   MatCreateSeqSBAIJ().
1158 
1159     MUST be called before any calls to MatSetValues()
1160 
1161 .seealso: MatCreateSeqSBAIJ
1162 @*/
1163 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1164 {
1165   PetscErrorCode ierr,(*f)(Mat,int *);
1166 
1167   PetscFunctionBegin;
1168   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1169   PetscValidPointer(indices,2);
1170   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1171   if (f) {
1172     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1173   } else {
1174     SETERRQ(1,"Wrong type of matrix to set column indices");
1175   }
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1181 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1182 {
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1192 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1193 {
1194   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1195   PetscFunctionBegin;
1196   *array = a->a;
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1202 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1203 {
1204   PetscFunctionBegin;
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #include "petscblaslapack.h"
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1211 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1212 {
1213   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1214   PetscErrorCode ierr;
1215   int            i,bs=y->bs,bs2,j;
1216   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1217 
1218   PetscFunctionBegin;
1219   if (str == SAME_NONZERO_PATTERN) {
1220     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1221   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1222     if (y->xtoy && y->XtoY != X) {
1223       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1224       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1225     }
1226     if (!y->xtoy) { /* get xtoy */
1227       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1228       y->XtoY = X;
1229     }
1230     bs2 = bs*bs;
1231     for (i=0; i<x->nz; i++) {
1232       j = 0;
1233       while (j < bs2){
1234         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1235         j++;
1236       }
1237     }
1238     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1239   } else {
1240     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1241   }
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1247 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1248 {
1249   PetscFunctionBegin;
1250   *flg = PETSC_TRUE;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1256 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1257 {
1258   PetscFunctionBegin;
1259   *flg = PETSC_TRUE;
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1265 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1266 {
1267   PetscFunctionBegin;
1268   *flg = PETSC_FALSE;
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 /* -------------------------------------------------------------------*/
1273 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1274        MatGetRow_SeqSBAIJ,
1275        MatRestoreRow_SeqSBAIJ,
1276        MatMult_SeqSBAIJ_N,
1277 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1278        MatMultTranspose_SeqSBAIJ,
1279        MatMultTransposeAdd_SeqSBAIJ,
1280        MatSolve_SeqSBAIJ_N,
1281        0,
1282        0,
1283 /*10*/ 0,
1284        0,
1285        MatCholeskyFactor_SeqSBAIJ,
1286        MatRelax_SeqSBAIJ,
1287        MatTranspose_SeqSBAIJ,
1288 /*15*/ MatGetInfo_SeqSBAIJ,
1289        MatEqual_SeqSBAIJ,
1290        MatGetDiagonal_SeqSBAIJ,
1291        MatDiagonalScale_SeqSBAIJ,
1292        MatNorm_SeqSBAIJ,
1293 /*20*/ 0,
1294        MatAssemblyEnd_SeqSBAIJ,
1295        0,
1296        MatSetOption_SeqSBAIJ,
1297        MatZeroEntries_SeqSBAIJ,
1298 /*25*/ MatZeroRows_SeqSBAIJ,
1299        0,
1300        0,
1301        MatCholeskyFactorSymbolic_SeqSBAIJ,
1302        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1303 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1304        0,
1305        MatICCFactorSymbolic_SeqSBAIJ,
1306        MatGetArray_SeqSBAIJ,
1307        MatRestoreArray_SeqSBAIJ,
1308 /*35*/ MatDuplicate_SeqSBAIJ,
1309        0,
1310        0,
1311        0,
1312        0,
1313 /*40*/ MatAXPY_SeqSBAIJ,
1314        MatGetSubMatrices_SeqSBAIJ,
1315        MatIncreaseOverlap_SeqSBAIJ,
1316        MatGetValues_SeqSBAIJ,
1317        0,
1318 /*45*/ MatPrintHelp_SeqSBAIJ,
1319        MatScale_SeqSBAIJ,
1320        0,
1321        0,
1322        0,
1323 /*50*/ MatGetBlockSize_SeqSBAIJ,
1324        MatGetRowIJ_SeqSBAIJ,
1325        MatRestoreRowIJ_SeqSBAIJ,
1326        0,
1327        0,
1328 /*55*/ 0,
1329        0,
1330        0,
1331        0,
1332        MatSetValuesBlocked_SeqSBAIJ,
1333 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1334        0,
1335        0,
1336        MatGetPetscMaps_Petsc,
1337        0,
1338 /*65*/ 0,
1339        0,
1340        0,
1341        0,
1342        0,
1343 /*70*/ MatGetRowMax_SeqSBAIJ,
1344        0,
1345        0,
1346        0,
1347        0,
1348 /*75*/ 0,
1349        0,
1350        0,
1351        0,
1352        0,
1353 /*80*/ 0,
1354        0,
1355        0,
1356 #if !defined(PETSC_USE_COMPLEX)
1357        MatGetInertia_SeqSBAIJ,
1358 #else
1359        0,
1360 #endif
1361        MatLoad_SeqSBAIJ,
1362 /*85*/ MatIsSymmetric_SeqSBAIJ,
1363        MatIsHermitian_SeqSBAIJ,
1364        MatIsStructurallySymmetric_SeqSBAIJ,
1365        0,
1366        0,
1367 /*90*/ 0,
1368        0,
1369        0,
1370        0,
1371        0,
1372 /*95*/ 0,
1373        0,
1374        0,
1375        0};
1376 
1377 EXTERN_C_BEGIN
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1380 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat)
1381 {
1382   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1383   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1384   PetscErrorCode ierr;
1385 
1386   PetscFunctionBegin;
1387   if (aij->nonew != 1) {
1388     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1389   }
1390 
1391   /* allocate space for values if not already there */
1392   if (!aij->saved_values) {
1393     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1394   }
1395 
1396   /* copy values over */
1397   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 EXTERN_C_END
1401 
1402 EXTERN_C_BEGIN
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1405 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat)
1406 {
1407   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1408   PetscErrorCode ierr;
1409   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1410 
1411   PetscFunctionBegin;
1412   if (aij->nonew != 1) {
1413     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1414   }
1415   if (!aij->saved_values) {
1416     SETERRQ(1,"Must call MatStoreValues(A);first");
1417   }
1418 
1419   /* copy values over */
1420   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 EXTERN_C_END
1424 
1425 EXTERN_C_BEGIN
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1428 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1429 {
1430   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1431   PetscErrorCode ierr;
1432   int          i,len,mbs,bs2;
1433   PetscTruth   flg;
1434 
1435   PetscFunctionBegin;
1436   B->preallocated = PETSC_TRUE;
1437   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1438   mbs  = B->m/bs;
1439   bs2  = bs*bs;
1440 
1441   if (mbs*bs != B->m) {
1442     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1443   }
1444 
1445   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1446   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1447   if (nnz) {
1448     for (i=0; i<mbs; i++) {
1449       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1450       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1451     }
1452   }
1453 
1454   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1455   if (!flg) {
1456     switch (bs) {
1457     case 1:
1458       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1459       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1460       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1461       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1462       B->ops->mult            = MatMult_SeqSBAIJ_1;
1463       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1464       break;
1465     case 2:
1466       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1467       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1468       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1469       B->ops->mult            = MatMult_SeqSBAIJ_2;
1470       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1471       break;
1472     case 3:
1473       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1474       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1475       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1476       B->ops->mult            = MatMult_SeqSBAIJ_3;
1477       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1478       break;
1479     case 4:
1480       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1481       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1482       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1483       B->ops->mult            = MatMult_SeqSBAIJ_4;
1484       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1485       break;
1486     case 5:
1487       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1488       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1489       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1490       B->ops->mult            = MatMult_SeqSBAIJ_5;
1491       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1492       break;
1493     case 6:
1494       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1495       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1496       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1497       B->ops->mult            = MatMult_SeqSBAIJ_6;
1498       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1499       break;
1500     case 7:
1501       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1502       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1503       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1504       B->ops->mult            = MatMult_SeqSBAIJ_7;
1505       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1506       break;
1507     }
1508   }
1509 
1510   b->mbs = mbs;
1511   b->nbs = mbs;
1512   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1513   if (!nnz) {
1514     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1515     else if (nz <= 0)        nz = 1;
1516     for (i=0; i<mbs; i++) {
1517       b->imax[i] = nz;
1518     }
1519     nz = nz*mbs; /* total nz */
1520   } else {
1521     nz = 0;
1522     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1523   }
1524   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1525 
1526   /* allocate the matrix space */
1527   len  = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1528   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1529   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1530   b->j = (int*)(b->a + nz*bs2);
1531   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1532   b->i = b->j + nz;
1533   b->singlemalloc = PETSC_TRUE;
1534 
1535   /* pointer to beginning of each row */
1536   b->i[0] = 0;
1537   for (i=1; i<mbs+1; i++) {
1538     b->i[i] = b->i[i-1] + b->imax[i-1];
1539   }
1540 
1541   /* b->ilen will count nonzeros in each block row so far. */
1542   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1543   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1544   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1545 
1546   b->bs               = bs;
1547   b->bs2              = bs2;
1548   b->nz             = 0;
1549   b->maxnz          = nz*bs2;
1550 
1551   b->inew             = 0;
1552   b->jnew             = 0;
1553   b->anew             = 0;
1554   b->a2anew           = 0;
1555   b->permute          = PETSC_FALSE;
1556   PetscFunctionReturn(0);
1557 }
1558 EXTERN_C_END
1559 
1560 EXTERN_C_BEGIN
1561 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1562 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*);
1563 EXTERN_C_END
1564 
1565 /*MC
1566    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1567    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1568 
1569    Options Database Keys:
1570 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1571 
1572   Level: beginner
1573 
1574 .seealso: MatCreateSeqSBAIJ
1575 M*/
1576 
1577 EXTERN_C_BEGIN
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1580 PetscErrorCode MatCreate_SeqSBAIJ(Mat B)
1581 {
1582   Mat_SeqSBAIJ *b;
1583   PetscErrorCode ierr;
1584   int size;
1585 
1586   PetscFunctionBegin;
1587   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1588   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1589   B->m = B->M = PetscMax(B->m,B->M);
1590   B->n = B->N = PetscMax(B->n,B->N);
1591 
1592   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1593   B->data = (void*)b;
1594   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1595   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1596   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1597   B->ops->view        = MatView_SeqSBAIJ;
1598   B->factor           = 0;
1599   B->lupivotthreshold = 1.0;
1600   B->mapping          = 0;
1601   b->row              = 0;
1602   b->icol             = 0;
1603   b->reallocs         = 0;
1604   b->saved_values     = 0;
1605 
1606   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1607   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1608 
1609   b->sorted           = PETSC_FALSE;
1610   b->roworiented      = PETSC_TRUE;
1611   b->nonew            = 0;
1612   b->diag             = 0;
1613   b->solve_work       = 0;
1614   b->mult_work        = 0;
1615   B->spptr            = 0;
1616   b->keepzeroedrows   = PETSC_FALSE;
1617   b->xtoy             = 0;
1618   b->XtoY             = 0;
1619 
1620   b->inew             = 0;
1621   b->jnew             = 0;
1622   b->anew             = 0;
1623   b->a2anew           = 0;
1624   b->permute          = PETSC_FALSE;
1625 
1626   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1627                                      "MatStoreValues_SeqSBAIJ",
1628                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1629   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1630                                      "MatRetrieveValues_SeqSBAIJ",
1631                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1632   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1633                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1634                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1635   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1636                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1637                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1638   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1639                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1640                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1641   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1642                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1643                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1644 
1645   B->symmetric                  = PETSC_TRUE;
1646   B->structurally_symmetric     = PETSC_TRUE;
1647   B->symmetric_set              = PETSC_TRUE;
1648   B->structurally_symmetric_set = PETSC_TRUE;
1649   PetscFunctionReturn(0);
1650 }
1651 EXTERN_C_END
1652 
1653 #undef __FUNCT__
1654 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1655 /*@C
1656    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1657    compressed row) format.  For good matrix assembly performance the
1658    user should preallocate the matrix storage by setting the parameter nz
1659    (or the array nnz).  By setting these parameters accurately, performance
1660    during matrix assembly can be increased by more than a factor of 50.
1661 
1662    Collective on Mat
1663 
1664    Input Parameters:
1665 +  A - the symmetric matrix
1666 .  bs - size of block
1667 .  nz - number of block nonzeros per block row (same for all rows)
1668 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1669          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1670 
1671    Options Database Keys:
1672 .   -mat_no_unroll - uses code that does not unroll the loops in the
1673                      block calculations (much slower)
1674 .    -mat_block_size - size of the blocks to use
1675 
1676    Level: intermediate
1677 
1678    Notes:
1679    Specify the preallocated storage with either nz or nnz (not both).
1680    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1681    allocation.  For additional details, see the users manual chapter on
1682    matrices.
1683 
1684 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1685 @*/
1686 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1687   PetscErrorCode ierr,(*f)(Mat,int,int,const int[]);
1688 
1689   PetscFunctionBegin;
1690   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1691   if (f) {
1692     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1693   }
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatCreateSeqSBAIJ"
1699 /*@C
1700    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1701    compressed row) format.  For good matrix assembly performance the
1702    user should preallocate the matrix storage by setting the parameter nz
1703    (or the array nnz).  By setting these parameters accurately, performance
1704    during matrix assembly can be increased by more than a factor of 50.
1705 
1706    Collective on MPI_Comm
1707 
1708    Input Parameters:
1709 +  comm - MPI communicator, set to PETSC_COMM_SELF
1710 .  bs - size of block
1711 .  m - number of rows, or number of columns
1712 .  nz - number of block nonzeros per block row (same for all rows)
1713 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1714          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1715 
1716    Output Parameter:
1717 .  A - the symmetric matrix
1718 
1719    Options Database Keys:
1720 .   -mat_no_unroll - uses code that does not unroll the loops in the
1721                      block calculations (much slower)
1722 .    -mat_block_size - size of the blocks to use
1723 
1724    Level: intermediate
1725 
1726    Notes:
1727 
1728    Specify the preallocated storage with either nz or nnz (not both).
1729    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1730    allocation.  For additional details, see the users manual chapter on
1731    matrices.
1732 
1733 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1734 @*/
1735 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1736 {
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1741   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1742   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1748 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1749 {
1750   Mat          C;
1751   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1752   PetscErrorCode ierr;
1753   int          i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1754 
1755   PetscFunctionBegin;
1756   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1757 
1758   *B = 0;
1759   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1760   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1761   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1762   c    = (Mat_SeqSBAIJ*)C->data;
1763 
1764   C->preallocated   = PETSC_TRUE;
1765   C->factor         = A->factor;
1766   c->row            = 0;
1767   c->icol           = 0;
1768   c->saved_values   = 0;
1769   c->keepzeroedrows = a->keepzeroedrows;
1770   C->assembled      = PETSC_TRUE;
1771 
1772   C->M    = A->M;
1773   C->N    = A->N;
1774   c->bs   = a->bs;
1775   c->bs2  = a->bs2;
1776   c->mbs  = a->mbs;
1777   c->nbs  = a->nbs;
1778 
1779   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1780   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1781   for (i=0; i<mbs; i++) {
1782     c->imax[i] = a->imax[i];
1783     c->ilen[i] = a->ilen[i];
1784   }
1785 
1786   /* allocate the matrix space */
1787   c->singlemalloc = PETSC_TRUE;
1788   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1789   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1790   c->j = (int*)(c->a + nz*bs2);
1791   c->i = c->j + nz;
1792   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1793   if (mbs > 0) {
1794     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1795     if (cpvalues == MAT_COPY_VALUES) {
1796       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1797     } else {
1798       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1799     }
1800   }
1801 
1802   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1803   c->sorted      = a->sorted;
1804   c->roworiented = a->roworiented;
1805   c->nonew       = a->nonew;
1806 
1807   if (a->diag) {
1808     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1809     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1810     for (i=0; i<mbs; i++) {
1811       c->diag[i] = a->diag[i];
1812     }
1813   } else c->diag        = 0;
1814   c->nz               = a->nz;
1815   c->maxnz            = a->maxnz;
1816   c->solve_work         = 0;
1817   c->mult_work          = 0;
1818   *B = C;
1819   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1825 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1826 {
1827   Mat_SeqSBAIJ *a;
1828   Mat          B;
1829   PetscErrorCode ierr;
1830   int          i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1831   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1832   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1833   int          *masked,nmask,tmp,bs2,ishift;
1834   PetscScalar  *aa;
1835   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1836 
1837   PetscFunctionBegin;
1838   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1839   bs2  = bs*bs;
1840 
1841   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1842   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1843   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1844   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1845   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1846   M = header[1]; N = header[2]; nz = header[3];
1847 
1848   if (header[3] < 0) {
1849     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1850   }
1851 
1852   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1853 
1854   /*
1855      This code adds extra rows to make sure the number of rows is
1856     divisible by the blocksize
1857   */
1858   mbs        = M/bs;
1859   extra_rows = bs - M + bs*(mbs);
1860   if (extra_rows == bs) extra_rows = 0;
1861   else                  mbs++;
1862   if (extra_rows) {
1863     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1864   }
1865 
1866   /* read in row lengths */
1867   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1868   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1869   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1870 
1871   /* read in column indices */
1872   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1873   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1874   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1875 
1876   /* loop over row lengths determining block row lengths */
1877   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1878   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1879   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1880   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1881   masked   = mask + mbs;
1882   rowcount = 0; nzcount = 0;
1883   for (i=0; i<mbs; i++) {
1884     nmask = 0;
1885     for (j=0; j<bs; j++) {
1886       kmax = rowlengths[rowcount];
1887       for (k=0; k<kmax; k++) {
1888         tmp = jj[nzcount++]/bs;   /* block col. index */
1889         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1890       }
1891       rowcount++;
1892     }
1893     s_browlengths[i] += nmask;
1894 
1895     /* zero out the mask elements we set */
1896     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1897   }
1898 
1899   /* create our matrix */
1900   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1901   ierr = MatSetType(B,type);CHKERRQ(ierr);
1902   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1903   a = (Mat_SeqSBAIJ*)B->data;
1904 
1905   /* set matrix "i" values */
1906   a->i[0] = 0;
1907   for (i=1; i<= mbs; i++) {
1908     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1909     a->ilen[i-1] = s_browlengths[i-1];
1910   }
1911   a->nz = a->i[mbs];
1912 
1913   /* read in nonzero values */
1914   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1915   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1916   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1917 
1918   /* set "a" and "j" values into matrix */
1919   nzcount = 0; jcount = 0;
1920   for (i=0; i<mbs; i++) {
1921     nzcountb = nzcount;
1922     nmask    = 0;
1923     for (j=0; j<bs; j++) {
1924       kmax = rowlengths[i*bs+j];
1925       for (k=0; k<kmax; k++) {
1926         tmp = jj[nzcount++]/bs; /* block col. index */
1927         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1928       }
1929     }
1930     /* sort the masked values */
1931     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1932 
1933     /* set "j" values into matrix */
1934     maskcount = 1;
1935     for (j=0; j<nmask; j++) {
1936       a->j[jcount++]  = masked[j];
1937       mask[masked[j]] = maskcount++;
1938     }
1939 
1940     /* set "a" values into matrix */
1941     ishift = bs2*a->i[i];
1942     for (j=0; j<bs; j++) {
1943       kmax = rowlengths[i*bs+j];
1944       for (k=0; k<kmax; k++) {
1945         tmp       = jj[nzcountb]/bs ; /* block col. index */
1946         if (tmp >= i){
1947           block     = mask[tmp] - 1;
1948           point     = jj[nzcountb] - bs*tmp;
1949           idx       = ishift + bs2*block + j + bs*point;
1950           a->a[idx] = aa[nzcountb];
1951         }
1952         nzcountb++;
1953       }
1954     }
1955     /* zero out the mask elements we set */
1956     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1957   }
1958   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1959 
1960   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1961   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1962   ierr = PetscFree(aa);CHKERRQ(ierr);
1963   ierr = PetscFree(jj);CHKERRQ(ierr);
1964   ierr = PetscFree(mask);CHKERRQ(ierr);
1965 
1966   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1967   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968   ierr = MatView_Private(B);CHKERRQ(ierr);
1969   *A = B;
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 #undef __FUNCT__
1974 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1975 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1976 {
1977   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1978   MatScalar    *aa=a->a,*v,*v1;
1979   PetscScalar  *x,*b,*t,sum,d;
1980   PetscErrorCode ierr;
1981   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j;
1982   int          nz,nz1,*vj,*vj1,i;
1983 
1984   PetscFunctionBegin;
1985   its = its*lits;
1986   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1987 
1988   if (bs > 1)
1989     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1990 
1991   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1992   if (xx != bb) {
1993     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1994   } else {
1995     b = x;
1996   }
1997 
1998   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1999 
2000   if (flag & SOR_ZERO_INITIAL_GUESS) {
2001     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2002       for (i=0; i<m; i++)
2003         t[i] = b[i];
2004 
2005       for (i=0; i<m; i++){
2006         d  = *(aa + ai[i]);  /* diag[i] */
2007         v  = aa + ai[i] + 1;
2008         vj = aj + ai[i] + 1;
2009         nz = ai[i+1] - ai[i] - 1;
2010         x[i] = omega*t[i]/d;
2011         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2012         PetscLogFlops(2*nz-1);
2013       }
2014     }
2015 
2016     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2017       for (i=0; i<m; i++)
2018         t[i] = b[i];
2019 
2020       for (i=0; i<m-1; i++){  /* update rhs */
2021         v  = aa + ai[i] + 1;
2022         vj = aj + ai[i] + 1;
2023         nz = ai[i+1] - ai[i] - 1;
2024         while (nz--) t[*vj++] -= x[i]*(*v++);
2025         PetscLogFlops(2*nz-1);
2026       }
2027       for (i=m-1; i>=0; i--){
2028         d  = *(aa + ai[i]);
2029         v  = aa + ai[i] + 1;
2030         vj = aj + ai[i] + 1;
2031         nz = ai[i+1] - ai[i] - 1;
2032         sum = t[i];
2033         while (nz--) sum -= x[*vj++]*(*v++);
2034         PetscLogFlops(2*nz-1);
2035         x[i] =   (1-omega)*x[i] + omega*sum/d;
2036       }
2037     }
2038     its--;
2039   }
2040 
2041   while (its--) {
2042     /*
2043        forward sweep:
2044        for i=0,...,m-1:
2045          sum[i] = (b[i] - U(i,:)x )/d[i];
2046          x[i]   = (1-omega)x[i] + omega*sum[i];
2047          b      = b - x[i]*U^T(i,:);
2048 
2049     */
2050     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2051       for (i=0; i<m; i++)
2052         t[i] = b[i];
2053 
2054       for (i=0; i<m; i++){
2055         d  = *(aa + ai[i]);  /* diag[i] */
2056         v  = aa + ai[i] + 1; v1=v;
2057         vj = aj + ai[i] + 1; vj1=vj;
2058         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2059         sum = t[i];
2060         while (nz1--) sum -= (*v1++)*x[*vj1++];
2061         x[i] = (1-omega)*x[i] + omega*sum/d;
2062         while (nz--) t[*vj++] -= x[i]*(*v++);
2063         PetscLogFlops(4*nz-2);
2064       }
2065     }
2066 
2067   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2068       /*
2069        backward sweep:
2070        b = b - x[i]*U^T(i,:), i=0,...,n-2
2071        for i=m-1,...,0:
2072          sum[i] = (b[i] - U(i,:)x )/d[i];
2073          x[i]   = (1-omega)x[i] + omega*sum[i];
2074       */
2075       for (i=0; i<m; i++)
2076         t[i] = b[i];
2077 
2078       for (i=0; i<m-1; i++){  /* update rhs */
2079         v  = aa + ai[i] + 1;
2080         vj = aj + ai[i] + 1;
2081         nz = ai[i+1] - ai[i] - 1;
2082         while (nz--) t[*vj++] -= x[i]*(*v++);
2083         PetscLogFlops(2*nz-1);
2084       }
2085       for (i=m-1; i>=0; i--){
2086         d  = *(aa + ai[i]);
2087         v  = aa + ai[i] + 1;
2088         vj = aj + ai[i] + 1;
2089         nz = ai[i+1] - ai[i] - 1;
2090         sum = t[i];
2091         while (nz--) sum -= x[*vj++]*(*v++);
2092         PetscLogFlops(2*nz-1);
2093         x[i] =   (1-omega)*x[i] + omega*sum/d;
2094       }
2095     }
2096   }
2097 
2098   ierr = PetscFree(t);CHKERRQ(ierr);
2099   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2100   if (bb != xx) {
2101     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2102   }
2103 
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 
2108 
2109 
2110 
2111 
2112