xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 2ca5cae0e13a3030e66dfe4e3774e6d1dcb097fa)
1 
2 /*
3     Defines the basic matrix operations for the SBAIJ (compressed row)
4   matrix storage format.
5 */
6 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
7 #include "src/inline/spops.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 
10 #define CHUNKSIZE  10
11 
12 /*
13      Checks for missing diagonals
14 */
15 #undef __FUNCT__
16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
18 {
19   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
20   PetscErrorCode ierr;
21   int         *diag,*jj = a->j,i;
22 
23   PetscFunctionBegin;
24   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
25   diag = a->diag;
26   for (i=0; i<a->mbs; i++) {
27     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
28   }
29   PetscFunctionReturn(0);
30 }
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
35 {
36   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
37   PetscErrorCode ierr;
38   int          i,mbs = a->mbs;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int nz = a->i[n];
63     for (i=0; i<nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int nz = a->i[n]-1;
84     for (i=0; i<nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 PetscErrorCode MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   PetscErrorCode ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   case MAT_NOT_SYMMETRIC:
183   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
184   case MAT_HERMITIAN:
185     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
186   case MAT_SYMMETRIC:
187   case MAT_STRUCTURALLY_SYMMETRIC:
188   case MAT_NOT_HERMITIAN:
189   case MAT_SYMMETRY_ETERNAL:
190   case MAT_NOT_SYMMETRY_ETERNAL:
191     break;
192   default:
193     SETERRQ(PETSC_ERR_SUP,"unknown option");
194   }
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
200 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
201 {
202   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
203   PetscErrorCode ierr;
204   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
205   MatScalar    *aa,*aa_i;
206   PetscScalar  *v_i;
207 
208   PetscFunctionBegin;
209   bs  = a->bs;
210   ai  = a->i;
211   aj  = a->j;
212   aa  = a->a;
213   bs2 = a->bs2;
214 
215   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
216 
217   bn  = row/bs;   /* Block number */
218   bp  = row % bs; /* Block position */
219   M   = ai[bn+1] - ai[bn];
220   *ncols = bs*M;
221 
222   if (v) {
223     *v = 0;
224     if (*ncols) {
225       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
226       for (i=0; i<M; i++) { /* for each block in the block row */
227         v_i  = *v + i*bs;
228         aa_i = aa + bs2*(ai[bn] + i);
229         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
230       }
231     }
232   }
233 
234   if (cols) {
235     *cols = 0;
236     if (*ncols) {
237       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
238       for (i=0; i<M; i++) { /* for each block in the block row */
239         cols_i = *cols + i*bs;
240         itmp  = bs*aj[ai[bn] + i];
241         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
242       }
243     }
244   }
245 
246   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
247   /* this segment is currently removed, so only entries in the upper triangle are obtained */
248 #ifdef column_search
249   v_i    = *v    + M*bs;
250   cols_i = *cols + M*bs;
251   for (i=0; i<bn; i++){ /* for each block row */
252     M = ai[i+1] - ai[i];
253     for (j=0; j<M; j++){
254       itmp = aj[ai[i] + j];    /* block column value */
255       if (itmp == bn){
256         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
257         for (k=0; k<bs; k++) {
258           *cols_i++ = i*bs+k;
259           *v_i++    = aa_i[k];
260         }
261         *ncols += bs;
262         break;
263       }
264     }
265   }
266 #endif
267 
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
273 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
274 {
275   PetscErrorCode ierr;
276 
277   PetscFunctionBegin;
278   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
279   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
285 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
286 {
287   PetscErrorCode ierr;
288   PetscFunctionBegin;
289   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
295 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
296 {
297   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
298   PetscErrorCode ierr;
299   int i,j,bs = a->bs,k,l,bs2=a->bs2;
300   char              *name;
301   PetscViewerFormat format;
302 
303   PetscFunctionBegin;
304   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
305   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
306   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
307     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
308   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
309     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
310   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
311     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
312     for (i=0; i<a->mbs; i++) {
313       for (j=0; j<bs; j++) {
314         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
315         for (k=a->i[i]; k<a->i[i+1]; k++) {
316           for (l=0; l<bs; l++) {
317 #if defined(PETSC_USE_COMPLEX)
318             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
319               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
320                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
321             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
322               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
323                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
324             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
325               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
326             }
327 #else
328             if (a->a[bs2*k + l*bs + j] != 0.0) {
329               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
330             }
331 #endif
332           }
333         }
334         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
335       }
336     }
337     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
338   } else {
339     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
340     for (i=0; i<a->mbs; i++) {
341       for (j=0; j<bs; j++) {
342         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
343         for (k=a->i[i]; k<a->i[i+1]; k++) {
344           for (l=0; l<bs; l++) {
345 #if defined(PETSC_USE_COMPLEX)
346             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
347               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
348                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
349             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
350               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
351                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
352             } else {
353               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
354             }
355 #else
356             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
357 #endif
358           }
359         }
360         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
361       }
362     }
363     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
364   }
365   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
366   PetscFunctionReturn(0);
367 }
368 
369 #undef __FUNCT__
370 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
371 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
372 {
373   Mat          A = (Mat) Aa;
374   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
375   PetscErrorCode ierr;
376   int          row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
377   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
378   MatScalar    *aa;
379   MPI_Comm     comm;
380   PetscViewer  viewer;
381 
382   PetscFunctionBegin;
383   /*
384       This is nasty. If this is called from an originally parallel matrix
385    then all processes call this,but only the first has the matrix so the
386    rest should return immediately.
387   */
388   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
389   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
390   if (rank) PetscFunctionReturn(0);
391 
392   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
393 
394   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
395   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
396 
397   /* loop over matrix elements drawing boxes */
398   color = PETSC_DRAW_BLUE;
399   for (i=0,row=0; i<mbs; i++,row+=bs) {
400     for (j=a->i[i]; j<a->i[i+1]; j++) {
401       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
402       x_l = a->j[j]*bs; x_r = x_l + 1.0;
403       aa = a->a + j*bs2;
404       for (k=0; k<bs; k++) {
405         for (l=0; l<bs; l++) {
406           if (PetscRealPart(*aa++) >=  0.) continue;
407           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
408         }
409       }
410     }
411   }
412   color = PETSC_DRAW_CYAN;
413   for (i=0,row=0; i<mbs; i++,row+=bs) {
414     for (j=a->i[i]; j<a->i[i+1]; j++) {
415       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
416       x_l = a->j[j]*bs; x_r = x_l + 1.0;
417       aa = a->a + j*bs2;
418       for (k=0; k<bs; k++) {
419         for (l=0; l<bs; l++) {
420           if (PetscRealPart(*aa++) != 0.) continue;
421           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
422         }
423       }
424     }
425   }
426 
427   color = PETSC_DRAW_RED;
428   for (i=0,row=0; i<mbs; i++,row+=bs) {
429     for (j=a->i[i]; j<a->i[i+1]; j++) {
430       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
431       x_l = a->j[j]*bs; x_r = x_l + 1.0;
432       aa = a->a + j*bs2;
433       for (k=0; k<bs; k++) {
434         for (l=0; l<bs; l++) {
435           if (PetscRealPart(*aa++) <= 0.) continue;
436           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
437         }
438       }
439     }
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
446 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
447 {
448   PetscErrorCode ierr;
449   PetscReal    xl,yl,xr,yr,w,h;
450   PetscDraw    draw;
451   PetscTruth   isnull;
452 
453   PetscFunctionBegin;
454 
455   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
456   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
457 
458   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
459   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
460   xr += w;    yr += h;  xl = -w;     yl = -h;
461   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
462   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
463   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 #undef __FUNCT__
468 #define __FUNCT__ "MatView_SeqSBAIJ"
469 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
470 {
471   PetscErrorCode ierr;
472   PetscTruth iascii,isdraw;
473 
474   PetscFunctionBegin;
475   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
476   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
477   if (iascii){
478     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
479   } else if (isdraw) {
480     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
481   } else {
482     Mat B;
483     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
484     ierr = MatView(B,viewer);CHKERRQ(ierr);
485     ierr = MatDestroy(B);CHKERRQ(ierr);
486   }
487   PetscFunctionReturn(0);
488 }
489 
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
493 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
494 {
495   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
496   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
497   int        *ai = a->i,*ailen = a->ilen;
498   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
499   MatScalar  *ap,*aa = a->a,zero = 0.0;
500 
501   PetscFunctionBegin;
502   for (k=0; k<m; k++) { /* loop over rows */
503     row  = im[k]; brow = row/bs;
504     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
505     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
506     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
507     nrow = ailen[brow];
508     for (l=0; l<n; l++) { /* loop over columns */
509       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
510       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
511       col  = in[l] ;
512       bcol = col/bs;
513       cidx = col%bs;
514       ridx = row%bs;
515       high = nrow;
516       low  = 0; /* assume unsorted */
517       while (high-low > 5) {
518         t = (low+high)/2;
519         if (rp[t] > bcol) high = t;
520         else             low  = t;
521       }
522       for (i=low; i<high; i++) {
523         if (rp[i] > bcol) break;
524         if (rp[i] == bcol) {
525           *v++ = ap[bs2*i+bs*cidx+ridx];
526           goto finished;
527         }
528       }
529       *v++ = zero;
530       finished:;
531     }
532   }
533   PetscFunctionReturn(0);
534 }
535 
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
539 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
540 {
541   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
542   PetscErrorCode ierr;
543   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
544   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
545   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
546   PetscTruth      roworiented=a->roworiented;
547   const MatScalar *value = v;
548   MatScalar       *ap,*aa = a->a,*bap;
549 
550   PetscFunctionBegin;
551   if (roworiented) {
552     stepval = (n-1)*bs;
553   } else {
554     stepval = (m-1)*bs;
555   }
556   for (k=0; k<m; k++) { /* loop over added rows */
557     row  = im[k];
558     if (row < 0) continue;
559 #if defined(PETSC_USE_BOPT_g)
560     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
561 #endif
562     rp   = aj + ai[row];
563     ap   = aa + bs2*ai[row];
564     rmax = imax[row];
565     nrow = ailen[row];
566     low  = 0;
567     for (l=0; l<n; l++) { /* loop over added columns */
568       if (in[l] < 0) continue;
569       col = in[l];
570 #if defined(PETSC_USE_BOPT_g)
571       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
572 #endif
573       if (col < row) continue; /* ignore lower triangular block */
574       if (roworiented) {
575         value = v + k*(stepval+bs)*bs + l*bs;
576       } else {
577         value = v + l*(stepval+bs)*bs + k*bs;
578       }
579       if (!sorted) low = 0; high = nrow;
580       while (high-low > 7) {
581         t = (low+high)/2;
582         if (rp[t] > col) high = t;
583         else             low  = t;
584       }
585       for (i=low; i<high; i++) {
586         if (rp[i] > col) break;
587         if (rp[i] == col) {
588           bap  = ap +  bs2*i;
589           if (roworiented) {
590             if (is == ADD_VALUES) {
591               for (ii=0; ii<bs; ii++,value+=stepval) {
592                 for (jj=ii; jj<bs2; jj+=bs) {
593                   bap[jj] += *value++;
594                 }
595               }
596             } else {
597               for (ii=0; ii<bs; ii++,value+=stepval) {
598                 for (jj=ii; jj<bs2; jj+=bs) {
599                   bap[jj] = *value++;
600                 }
601               }
602             }
603           } else {
604             if (is == ADD_VALUES) {
605               for (ii=0; ii<bs; ii++,value+=stepval) {
606                 for (jj=0; jj<bs; jj++) {
607                   *bap++ += *value++;
608                 }
609               }
610             } else {
611               for (ii=0; ii<bs; ii++,value+=stepval) {
612                 for (jj=0; jj<bs; jj++) {
613                   *bap++  = *value++;
614                 }
615               }
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       if (nonew == 1) goto noinsert2;
622       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
623       if (nrow >= rmax) {
624         /* there is no extra room in row, therefore enlarge */
625         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
626         MatScalar *new_a;
627 
628         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
629 
630         /* malloc new storage space */
631         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
632 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
633         new_j   = (int*)(new_a + bs2*new_nz);
634         new_i   = new_j + new_nz;
635 
636         /* copy over old data into new slots */
637         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
638         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
639         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
640         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
641         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
642         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
643         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
644         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
645         /* free up old matrix storage */
646         ierr = PetscFree(a->a);CHKERRQ(ierr);
647         if (!a->singlemalloc) {
648           ierr = PetscFree(a->i);CHKERRQ(ierr);
649           ierr = PetscFree(a->j);CHKERRQ(ierr);
650         }
651         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
652         a->singlemalloc = PETSC_TRUE;
653 
654         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
655         rmax = imax[row] = imax[row] + CHUNKSIZE;
656         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
657         a->maxnz += bs2*CHUNKSIZE;
658         a->reallocs++;
659         a->nz++;
660       }
661       N = nrow++ - 1;
662       /* shift up all the later entries in this row */
663       for (ii=N; ii>=i; ii--) {
664         rp[ii+1] = rp[ii];
665         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
666       }
667       if (N >= i) {
668         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
669       }
670       rp[i] = col;
671       bap   = ap +  bs2*i;
672       if (roworiented) {
673         for (ii=0; ii<bs; ii++,value+=stepval) {
674           for (jj=ii; jj<bs2; jj+=bs) {
675             bap[jj] = *value++;
676           }
677         }
678       } else {
679         for (ii=0; ii<bs; ii++,value+=stepval) {
680           for (jj=0; jj<bs; jj++) {
681             *bap++  = *value++;
682           }
683         }
684       }
685       noinsert2:;
686       low = i;
687     }
688     ailen[row] = nrow;
689   }
690   PetscFunctionReturn(0);
691 }
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
695 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
696 {
697   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
698   PetscErrorCode ierr;
699   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
700   int        m = A->m,*ip,N,*ailen = a->ilen;
701   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0;
702   MatScalar  *aa = a->a,*ap;
703 
704   PetscFunctionBegin;
705   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
706 
707   if (m) rmax = ailen[0];
708   for (i=1; i<mbs; i++) {
709     /* move each row back by the amount of empty slots (fshift) before it*/
710     fshift += imax[i-1] - ailen[i-1];
711     rmax   = PetscMax(rmax,ailen[i]);
712     if (fshift) {
713       ip = aj + ai[i]; ap = aa + bs2*ai[i];
714       N = ailen[i];
715       for (j=0; j<N; j++) {
716         ip[j-fshift] = ip[j];
717         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
718       }
719     }
720     ai[i] = ai[i-1] + ailen[i-1];
721   }
722   if (mbs) {
723     fshift += imax[mbs-1] - ailen[mbs-1];
724     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
725   }
726   /* reset ilen and imax for each row */
727   for (i=0; i<mbs; i++) {
728     ailen[i] = imax[i] = ai[i+1] - ai[i];
729   }
730   a->nz = ai[mbs];
731 
732   /* diagonals may have moved, reset it */
733   if (a->diag) {
734     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
735   }
736   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
737            m,A->m,a->bs,fshift*bs2,a->nz*bs2);
738   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
739            a->reallocs);
740   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
741   a->reallocs          = 0;
742   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
743 
744   PetscFunctionReturn(0);
745 }
746 
747 /*
748    This function returns an array of flags which indicate the locations of contiguous
749    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
750    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
751    Assume: sizes should be long enough to hold all the values.
752 */
753 #undef __FUNCT__
754 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
755 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
756 {
757   int        i,j,k,row;
758   PetscTruth flg;
759 
760   PetscFunctionBegin;
761   for (i=0,j=0; i<n; j++) {
762     row = idx[i];
763     if (row%bs!=0) { /* Not the begining of a block */
764       sizes[j] = 1;
765       i++;
766     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
767       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
768       i++;
769     } else { /* Begining of the block, so check if the complete block exists */
770       flg = PETSC_TRUE;
771       for (k=1; k<bs; k++) {
772         if (row+k != idx[i+k]) { /* break in the block */
773           flg = PETSC_FALSE;
774           break;
775         }
776       }
777       if (flg == PETSC_TRUE) { /* No break in the bs */
778         sizes[j] = bs;
779         i+= bs;
780       } else {
781         sizes[j] = 1;
782         i++;
783       }
784     }
785   }
786   *bs_max = j;
787   PetscFunctionReturn(0);
788 }
789 
790 #undef __FUNCT__
791 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
792 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
793 {
794   PetscFunctionBegin;
795   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
796 }
797 
798 /* Only add/insert a(i,j) with i<=j (blocks).
799    Any a(i,j) with i>j input by user is ingored.
800 */
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
804 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
805 {
806   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
807   PetscErrorCode ierr;
808   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
809   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
810   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
811   int         ridx,cidx,bs2=a->bs2;
812   MatScalar   *ap,value,*aa=a->a,*bap;
813 
814   PetscFunctionBegin;
815 
816   for (k=0; k<m; k++) { /* loop over added rows */
817     row  = im[k];       /* row number */
818     brow = row/bs;      /* block row number */
819     if (row < 0) continue;
820 #if defined(PETSC_USE_BOPT_g)
821     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
822 #endif
823     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
824     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
825     rmax = imax[brow];  /* maximum space allocated for this row */
826     nrow = ailen[brow]; /* actual length of this row */
827     low  = 0;
828 
829     for (l=0; l<n; l++) { /* loop over added columns */
830       if (in[l] < 0) continue;
831 #if defined(PETSC_USE_BOPT_g)
832       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
833 #endif
834       col = in[l];
835       bcol = col/bs;              /* block col number */
836 
837       if (brow <= bcol){
838         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
839         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
840           /* element value a(k,l) */
841           if (roworiented) {
842             value = v[l + k*n];
843           } else {
844             value = v[k + l*m];
845           }
846 
847           /* move pointer bap to a(k,l) quickly and add/insert value */
848           if (!sorted) low = 0; high = nrow;
849           while (high-low > 7) {
850             t = (low+high)/2;
851             if (rp[t] > bcol) high = t;
852             else              low  = t;
853           }
854           for (i=low; i<high; i++) {
855             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
856             if (rp[i] > bcol) break;
857             if (rp[i] == bcol) {
858               bap  = ap +  bs2*i + bs*cidx + ridx;
859               if (is == ADD_VALUES) *bap += value;
860               else                  *bap  = value;
861               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
862               if (brow == bcol && ridx < cidx){
863                 bap  = ap +  bs2*i + bs*ridx + cidx;
864                 if (is == ADD_VALUES) *bap += value;
865                 else                  *bap  = value;
866               }
867               goto noinsert1;
868             }
869           }
870 
871       if (nonew == 1) goto noinsert1;
872       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
873       if (nrow >= rmax) {
874         /* there is no extra room in row, therefore enlarge */
875         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
876         MatScalar *new_a;
877 
878         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
879 
880         /* Malloc new storage space */
881         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
882         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
883         new_j = (int*)(new_a + bs2*new_nz);
884         new_i = new_j + new_nz;
885 
886         /* copy over old data into new slots */
887         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
888         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
889         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
890         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
891         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
892         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
893         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
894         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
895         /* free up old matrix storage */
896         ierr = PetscFree(a->a);CHKERRQ(ierr);
897         if (!a->singlemalloc) {
898           ierr = PetscFree(a->i);CHKERRQ(ierr);
899           ierr = PetscFree(a->j);CHKERRQ(ierr);
900         }
901         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
902         a->singlemalloc = PETSC_TRUE;
903 
904         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
905         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
906         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
907         a->maxnz += bs2*CHUNKSIZE;
908         a->reallocs++;
909         a->nz++;
910       }
911 
912       N = nrow++ - 1;
913       /* shift up all the later entries in this row */
914       for (ii=N; ii>=i; ii--) {
915         rp[ii+1] = rp[ii];
916         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
917       }
918       if (N>=i) {
919         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
920       }
921       rp[i]                      = bcol;
922       ap[bs2*i + bs*cidx + ridx] = value;
923       noinsert1:;
924       low = i;
925       /* } */
926         }
927       } /* end of if .. if..  */
928     }                     /* end of loop over added columns */
929     ailen[brow] = nrow;
930   }                       /* end of loop over added rows */
931 
932   PetscFunctionReturn(0);
933 }
934 
935 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
936 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
937 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
938 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
939 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
940 EXTERN PetscErrorCode MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
941 EXTERN PetscErrorCode MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
942 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat);
943 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
944 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
945 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
946 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
947 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
948 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
949 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
950 
951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
959 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
960 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
961 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
962 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
963 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
964 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
965 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
966 
967 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
968 
969 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
970 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
971 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
972 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
973 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
974 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
975 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
976 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
977 
978 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
979 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
980 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
981 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
982 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
983 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
984 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
985 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
986 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
987 
988 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
989 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
990 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
991 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
992 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
993 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
994 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
995 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
996 
997 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
998 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
999 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1000 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1001 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1002 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1003 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1004 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1005 
1006 #ifdef HAVE_MatICCFactor
1007 /* modified from MatILUFactor_SeqSBAIJ, needs further work!  */
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1010 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1011 {
1012   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1013   Mat         outA;
1014   PetscErrorCode ierr;
1015   PetscTruth  row_identity,col_identity;
1016 
1017   PetscFunctionBegin;
1018   /*
1019   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1020   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1021   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1022   if (!row_identity || !col_identity) {
1023     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1024   }
1025   */
1026 
1027   outA          = inA;
1028   inA->factor   = FACTOR_CHOLESKY;
1029 
1030   if (!a->diag) {
1031     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1032   }
1033   /*
1034       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1035       for ILU(0) factorization with natural ordering
1036   */
1037   switch (a->bs) {
1038   case 1:
1039     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1040     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1041     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1042     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1043   case 2:
1044     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1045     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1046     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1047     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1048     break;
1049   case 3:
1050     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1051     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1052     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1053     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1054     break;
1055   case 4:
1056     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1057     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1058     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1059     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1060     break;
1061   case 5:
1062     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1063     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1064     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1065     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1066     break;
1067   case 6:
1068     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1069     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1070     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1071     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1072     break;
1073   case 7:
1074     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1075     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1076     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1077     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1078     break;
1079   default:
1080     a->row        = row;
1081     a->icol       = col;
1082     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1083     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1084 
1085     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1086     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1087     PetscLogObjectParent(inA,a->icol);
1088 
1089     if (!a->solve_work) {
1090       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1091       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1092     }
1093   }
1094 
1095   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 #endif
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1103 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1104 {
1105   static PetscTruth called = PETSC_FALSE;
1106   MPI_Comm          comm = A->comm;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1111   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1112   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 EXTERN_C_BEGIN
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1119 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1120 {
1121   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1122   int         i,nz,n;
1123 
1124   PetscFunctionBegin;
1125   nz = baij->maxnz;
1126   n  = mat->n;
1127   for (i=0; i<nz; i++) {
1128     baij->j[i] = indices[i];
1129   }
1130   baij->nz = nz;
1131   for (i=0; i<n; i++) {
1132     baij->ilen[i] = baij->imax[i];
1133   }
1134 
1135   PetscFunctionReturn(0);
1136 }
1137 EXTERN_C_END
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1141 /*@
1142     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1143        in the matrix.
1144 
1145   Input Parameters:
1146 +  mat     - the SeqSBAIJ matrix
1147 -  indices - the column indices
1148 
1149   Level: advanced
1150 
1151   Notes:
1152     This can be called if you have precomputed the nonzero structure of the
1153   matrix and want to provide it to the matrix object to improve the performance
1154   of the MatSetValues() operation.
1155 
1156     You MUST have set the correct numbers of nonzeros per row in the call to
1157   MatCreateSeqSBAIJ().
1158 
1159     MUST be called before any calls to MatSetValues()
1160 
1161 .seealso: MatCreateSeqSBAIJ
1162 @*/
1163 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1164 {
1165   PetscErrorCode ierr,(*f)(Mat,int *);
1166 
1167   PetscFunctionBegin;
1168   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1169   PetscValidPointer(indices,2);
1170   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1171   if (f) {
1172     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1173   } else {
1174     SETERRQ(1,"Wrong type of matrix to set column indices");
1175   }
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1181 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1182 {
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1192 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1193 {
1194   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1195   PetscFunctionBegin;
1196   *array = a->a;
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1202 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1203 {
1204   PetscFunctionBegin;
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #include "petscblaslapack.h"
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1211 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1212 {
1213   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1214   PetscErrorCode ierr;
1215   int            i,bs=y->bs,bs2,j;
1216   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1217 
1218   PetscFunctionBegin;
1219   if (str == SAME_NONZERO_PATTERN) {
1220     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1221   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1222     if (y->xtoy && y->XtoY != X) {
1223       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1224       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1225     }
1226     if (!y->xtoy) { /* get xtoy */
1227       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1228       y->XtoY = X;
1229     }
1230     bs2 = bs*bs;
1231     for (i=0; i<x->nz; i++) {
1232       j = 0;
1233       while (j < bs2){
1234         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1235         j++;
1236       }
1237     }
1238     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1239   } else {
1240     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1241   }
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1247 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1248 {
1249   PetscFunctionBegin;
1250   *flg = PETSC_TRUE;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1256 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1257 {
1258   PetscFunctionBegin;
1259   *flg = PETSC_TRUE;
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1265 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1266 {
1267   PetscFunctionBegin;
1268   *flg = PETSC_FALSE;
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 /* -------------------------------------------------------------------*/
1273 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1274        MatGetRow_SeqSBAIJ,
1275        MatRestoreRow_SeqSBAIJ,
1276        MatMult_SeqSBAIJ_N,
1277 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1278        MatMultTranspose_SeqSBAIJ,
1279        MatMultTransposeAdd_SeqSBAIJ,
1280        MatSolve_SeqSBAIJ_N,
1281        0,
1282        0,
1283 /*10*/ 0,
1284        0,
1285        MatCholeskyFactor_SeqSBAIJ,
1286        MatRelax_SeqSBAIJ,
1287        MatTranspose_SeqSBAIJ,
1288 /*15*/ MatGetInfo_SeqSBAIJ,
1289        MatEqual_SeqSBAIJ,
1290        MatGetDiagonal_SeqSBAIJ,
1291        MatDiagonalScale_SeqSBAIJ,
1292        MatNorm_SeqSBAIJ,
1293 /*20*/ 0,
1294        MatAssemblyEnd_SeqSBAIJ,
1295        0,
1296        MatSetOption_SeqSBAIJ,
1297        MatZeroEntries_SeqSBAIJ,
1298 /*25*/ MatZeroRows_SeqSBAIJ,
1299        0,
1300        0,
1301        MatCholeskyFactorSymbolic_SeqSBAIJ,
1302        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1303 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1304        0,
1305        MatICCFactorSymbolic_SeqSBAIJ,
1306        MatGetArray_SeqSBAIJ,
1307        MatRestoreArray_SeqSBAIJ,
1308 /*35*/ MatDuplicate_SeqSBAIJ,
1309        0,
1310        0,
1311        0,
1312        0,
1313 /*40*/ MatAXPY_SeqSBAIJ,
1314        MatGetSubMatrices_SeqSBAIJ,
1315        MatIncreaseOverlap_SeqSBAIJ,
1316        MatGetValues_SeqSBAIJ,
1317        0,
1318 /*45*/ MatPrintHelp_SeqSBAIJ,
1319        MatScale_SeqSBAIJ,
1320        0,
1321        0,
1322        0,
1323 /*50*/ MatGetBlockSize_SeqSBAIJ,
1324        MatGetRowIJ_SeqSBAIJ,
1325        MatRestoreRowIJ_SeqSBAIJ,
1326        0,
1327        0,
1328 /*55*/ 0,
1329        0,
1330        0,
1331        0,
1332        MatSetValuesBlocked_SeqSBAIJ,
1333 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1334        0,
1335        0,
1336        MatGetPetscMaps_Petsc,
1337        0,
1338 /*65*/ 0,
1339        0,
1340        0,
1341        0,
1342        0,
1343 /*70*/ MatGetRowMax_SeqSBAIJ,
1344        0,
1345        0,
1346        0,
1347        0,
1348 /*75*/ 0,
1349        0,
1350        0,
1351        0,
1352        0,
1353 /*80*/ 0,
1354        0,
1355        0,
1356 #if !defined(PETSC_USE_COMPLEX)
1357        MatGetInertia_SeqSBAIJ,
1358 #else
1359        0,
1360 #endif
1361 /*84*/ MatLoad_SeqSBAIJ,
1362        MatIsSymmetric_SeqSBAIJ,
1363        MatIsStructurallySymmetric_SeqSBAIJ,
1364        MatIsHermitian_SeqSBAIJ
1365 };
1366 
1367 EXTERN_C_BEGIN
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1370 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat)
1371 {
1372   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1373   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1374   PetscErrorCode ierr;
1375 
1376   PetscFunctionBegin;
1377   if (aij->nonew != 1) {
1378     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1379   }
1380 
1381   /* allocate space for values if not already there */
1382   if (!aij->saved_values) {
1383     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1384   }
1385 
1386   /* copy values over */
1387   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 EXTERN_C_END
1391 
1392 EXTERN_C_BEGIN
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1395 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat)
1396 {
1397   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1398   PetscErrorCode ierr;
1399   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1400 
1401   PetscFunctionBegin;
1402   if (aij->nonew != 1) {
1403     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1404   }
1405   if (!aij->saved_values) {
1406     SETERRQ(1,"Must call MatStoreValues(A);first");
1407   }
1408 
1409   /* copy values over */
1410   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 EXTERN_C_END
1414 
1415 EXTERN_C_BEGIN
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1418 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1419 {
1420   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1421   PetscErrorCode ierr;
1422   int          i,len,mbs,bs2;
1423   PetscTruth   flg;
1424 
1425   PetscFunctionBegin;
1426   B->preallocated = PETSC_TRUE;
1427   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1428   mbs  = B->m/bs;
1429   bs2  = bs*bs;
1430 
1431   if (mbs*bs != B->m) {
1432     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1433   }
1434 
1435   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1436   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1437   if (nnz) {
1438     for (i=0; i<mbs; i++) {
1439       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1440       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1441     }
1442   }
1443 
1444   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1445   if (!flg) {
1446     switch (bs) {
1447     case 1:
1448       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1449       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1450       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1451       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1452       B->ops->mult            = MatMult_SeqSBAIJ_1;
1453       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1454       break;
1455     case 2:
1456       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1457       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1458       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1459       B->ops->mult            = MatMult_SeqSBAIJ_2;
1460       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1461       break;
1462     case 3:
1463       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1464       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1465       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1466       B->ops->mult            = MatMult_SeqSBAIJ_3;
1467       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1468       break;
1469     case 4:
1470       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1471       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1472       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1473       B->ops->mult            = MatMult_SeqSBAIJ_4;
1474       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1475       break;
1476     case 5:
1477       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1478       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1479       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1480       B->ops->mult            = MatMult_SeqSBAIJ_5;
1481       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1482       break;
1483     case 6:
1484       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1485       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1486       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1487       B->ops->mult            = MatMult_SeqSBAIJ_6;
1488       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1489       break;
1490     case 7:
1491       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1492       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1493       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1494       B->ops->mult            = MatMult_SeqSBAIJ_7;
1495       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1496       break;
1497     }
1498   }
1499 
1500   b->mbs = mbs;
1501   b->nbs = mbs;
1502   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1503   if (!nnz) {
1504     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1505     else if (nz <= 0)        nz = 1;
1506     for (i=0; i<mbs; i++) {
1507       b->imax[i] = nz;
1508     }
1509     nz = nz*mbs; /* total nz */
1510   } else {
1511     nz = 0;
1512     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1513   }
1514   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1515 
1516   /* allocate the matrix space */
1517   len  = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1518   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1519   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1520   b->j = (int*)(b->a + nz*bs2);
1521   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1522   b->i = b->j + nz;
1523   b->singlemalloc = PETSC_TRUE;
1524 
1525   /* pointer to beginning of each row */
1526   b->i[0] = 0;
1527   for (i=1; i<mbs+1; i++) {
1528     b->i[i] = b->i[i-1] + b->imax[i-1];
1529   }
1530 
1531   /* b->ilen will count nonzeros in each block row so far. */
1532   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1533   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1534   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1535 
1536   b->bs               = bs;
1537   b->bs2              = bs2;
1538   b->nz             = 0;
1539   b->maxnz          = nz*bs2;
1540 
1541   b->inew             = 0;
1542   b->jnew             = 0;
1543   b->anew             = 0;
1544   b->a2anew           = 0;
1545   b->permute          = PETSC_FALSE;
1546   PetscFunctionReturn(0);
1547 }
1548 EXTERN_C_END
1549 
1550 EXTERN_C_BEGIN
1551 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1552 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*);
1553 EXTERN_C_END
1554 
1555 /*MC
1556    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1557    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1558 
1559    Options Database Keys:
1560 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1561 
1562   Level: beginner
1563 
1564 .seealso: MatCreateSeqSBAIJ
1565 M*/
1566 
1567 EXTERN_C_BEGIN
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1570 PetscErrorCode MatCreate_SeqSBAIJ(Mat B)
1571 {
1572   Mat_SeqSBAIJ *b;
1573   PetscErrorCode ierr;
1574   int size;
1575 
1576   PetscFunctionBegin;
1577   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1578   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1579   B->m = B->M = PetscMax(B->m,B->M);
1580   B->n = B->N = PetscMax(B->n,B->N);
1581 
1582   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1583   B->data = (void*)b;
1584   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1585   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1586   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1587   B->ops->view        = MatView_SeqSBAIJ;
1588   B->factor           = 0;
1589   B->lupivotthreshold = 1.0;
1590   B->mapping          = 0;
1591   b->row              = 0;
1592   b->icol             = 0;
1593   b->reallocs         = 0;
1594   b->saved_values     = 0;
1595 
1596   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1597   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1598 
1599   b->sorted           = PETSC_FALSE;
1600   b->roworiented      = PETSC_TRUE;
1601   b->nonew            = 0;
1602   b->diag             = 0;
1603   b->solve_work       = 0;
1604   b->mult_work        = 0;
1605   B->spptr            = 0;
1606   b->keepzeroedrows   = PETSC_FALSE;
1607   b->xtoy             = 0;
1608   b->XtoY             = 0;
1609 
1610   b->inew             = 0;
1611   b->jnew             = 0;
1612   b->anew             = 0;
1613   b->a2anew           = 0;
1614   b->permute          = PETSC_FALSE;
1615 
1616   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1617                                      "MatStoreValues_SeqSBAIJ",
1618                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1619   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1620                                      "MatRetrieveValues_SeqSBAIJ",
1621                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1622   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1623                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1624                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1626                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1627                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1629                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1630                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1632                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1633                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1634 
1635   B->symmetric                  = PETSC_TRUE;
1636   B->structurally_symmetric     = PETSC_TRUE;
1637   B->symmetric_set              = PETSC_TRUE;
1638   B->structurally_symmetric_set = PETSC_TRUE;
1639   PetscFunctionReturn(0);
1640 }
1641 EXTERN_C_END
1642 
1643 #undef __FUNCT__
1644 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1645 /*@C
1646    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1647    compressed row) format.  For good matrix assembly performance the
1648    user should preallocate the matrix storage by setting the parameter nz
1649    (or the array nnz).  By setting these parameters accurately, performance
1650    during matrix assembly can be increased by more than a factor of 50.
1651 
1652    Collective on Mat
1653 
1654    Input Parameters:
1655 +  A - the symmetric matrix
1656 .  bs - size of block
1657 .  nz - number of block nonzeros per block row (same for all rows)
1658 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1659          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1660 
1661    Options Database Keys:
1662 .   -mat_no_unroll - uses code that does not unroll the loops in the
1663                      block calculations (much slower)
1664 .    -mat_block_size - size of the blocks to use
1665 
1666    Level: intermediate
1667 
1668    Notes:
1669    Specify the preallocated storage with either nz or nnz (not both).
1670    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1671    allocation.  For additional details, see the users manual chapter on
1672    matrices.
1673 
1674 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1675 @*/
1676 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1677   PetscErrorCode ierr,(*f)(Mat,int,int,const int[]);
1678 
1679   PetscFunctionBegin;
1680   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1681   if (f) {
1682     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1683   }
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatCreateSeqSBAIJ"
1689 /*@C
1690    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1691    compressed row) format.  For good matrix assembly performance the
1692    user should preallocate the matrix storage by setting the parameter nz
1693    (or the array nnz).  By setting these parameters accurately, performance
1694    during matrix assembly can be increased by more than a factor of 50.
1695 
1696    Collective on MPI_Comm
1697 
1698    Input Parameters:
1699 +  comm - MPI communicator, set to PETSC_COMM_SELF
1700 .  bs - size of block
1701 .  m - number of rows, or number of columns
1702 .  nz - number of block nonzeros per block row (same for all rows)
1703 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1704          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1705 
1706    Output Parameter:
1707 .  A - the symmetric matrix
1708 
1709    Options Database Keys:
1710 .   -mat_no_unroll - uses code that does not unroll the loops in the
1711                      block calculations (much slower)
1712 .    -mat_block_size - size of the blocks to use
1713 
1714    Level: intermediate
1715 
1716    Notes:
1717 
1718    Specify the preallocated storage with either nz or nnz (not both).
1719    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1720    allocation.  For additional details, see the users manual chapter on
1721    matrices.
1722 
1723 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1724 @*/
1725 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1726 {
1727   PetscErrorCode ierr;
1728 
1729   PetscFunctionBegin;
1730   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1731   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1732   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1738 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1739 {
1740   Mat          C;
1741   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1742   PetscErrorCode ierr;
1743   int          i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1744 
1745   PetscFunctionBegin;
1746   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1747 
1748   *B = 0;
1749   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1750   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1751   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1752   c    = (Mat_SeqSBAIJ*)C->data;
1753 
1754   C->preallocated   = PETSC_TRUE;
1755   C->factor         = A->factor;
1756   c->row            = 0;
1757   c->icol           = 0;
1758   c->saved_values   = 0;
1759   c->keepzeroedrows = a->keepzeroedrows;
1760   C->assembled      = PETSC_TRUE;
1761 
1762   C->M    = A->M;
1763   C->N    = A->N;
1764   c->bs   = a->bs;
1765   c->bs2  = a->bs2;
1766   c->mbs  = a->mbs;
1767   c->nbs  = a->nbs;
1768 
1769   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1770   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1771   for (i=0; i<mbs; i++) {
1772     c->imax[i] = a->imax[i];
1773     c->ilen[i] = a->ilen[i];
1774   }
1775 
1776   /* allocate the matrix space */
1777   c->singlemalloc = PETSC_TRUE;
1778   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1779   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1780   c->j = (int*)(c->a + nz*bs2);
1781   c->i = c->j + nz;
1782   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1783   if (mbs > 0) {
1784     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1785     if (cpvalues == MAT_COPY_VALUES) {
1786       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1787     } else {
1788       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1789     }
1790   }
1791 
1792   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1793   c->sorted      = a->sorted;
1794   c->roworiented = a->roworiented;
1795   c->nonew       = a->nonew;
1796 
1797   if (a->diag) {
1798     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1799     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1800     for (i=0; i<mbs; i++) {
1801       c->diag[i] = a->diag[i];
1802     }
1803   } else c->diag        = 0;
1804   c->nz               = a->nz;
1805   c->maxnz            = a->maxnz;
1806   c->solve_work         = 0;
1807   c->mult_work          = 0;
1808   *B = C;
1809   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1815 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1816 {
1817   Mat_SeqSBAIJ *a;
1818   Mat          B;
1819   PetscErrorCode ierr;
1820   int          i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1821   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1822   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1823   int          *masked,nmask,tmp,bs2,ishift;
1824   PetscScalar  *aa;
1825   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1826 
1827   PetscFunctionBegin;
1828   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1829   bs2  = bs*bs;
1830 
1831   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1832   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1833   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1834   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1835   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1836   M = header[1]; N = header[2]; nz = header[3];
1837 
1838   if (header[3] < 0) {
1839     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1840   }
1841 
1842   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1843 
1844   /*
1845      This code adds extra rows to make sure the number of rows is
1846     divisible by the blocksize
1847   */
1848   mbs        = M/bs;
1849   extra_rows = bs - M + bs*(mbs);
1850   if (extra_rows == bs) extra_rows = 0;
1851   else                  mbs++;
1852   if (extra_rows) {
1853     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1854   }
1855 
1856   /* read in row lengths */
1857   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1858   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1859   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1860 
1861   /* read in column indices */
1862   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1863   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1864   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1865 
1866   /* loop over row lengths determining block row lengths */
1867   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1868   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1869   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1870   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1871   masked   = mask + mbs;
1872   rowcount = 0; nzcount = 0;
1873   for (i=0; i<mbs; i++) {
1874     nmask = 0;
1875     for (j=0; j<bs; j++) {
1876       kmax = rowlengths[rowcount];
1877       for (k=0; k<kmax; k++) {
1878         tmp = jj[nzcount++]/bs;   /* block col. index */
1879         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1880       }
1881       rowcount++;
1882     }
1883     s_browlengths[i] += nmask;
1884 
1885     /* zero out the mask elements we set */
1886     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1887   }
1888 
1889   /* create our matrix */
1890   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1891   ierr = MatSetType(B,type);CHKERRQ(ierr);
1892   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1893   a = (Mat_SeqSBAIJ*)B->data;
1894 
1895   /* set matrix "i" values */
1896   a->i[0] = 0;
1897   for (i=1; i<= mbs; i++) {
1898     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1899     a->ilen[i-1] = s_browlengths[i-1];
1900   }
1901   a->nz = a->i[mbs];
1902 
1903   /* read in nonzero values */
1904   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1905   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1906   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1907 
1908   /* set "a" and "j" values into matrix */
1909   nzcount = 0; jcount = 0;
1910   for (i=0; i<mbs; i++) {
1911     nzcountb = nzcount;
1912     nmask    = 0;
1913     for (j=0; j<bs; j++) {
1914       kmax = rowlengths[i*bs+j];
1915       for (k=0; k<kmax; k++) {
1916         tmp = jj[nzcount++]/bs; /* block col. index */
1917         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1918       }
1919     }
1920     /* sort the masked values */
1921     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1922 
1923     /* set "j" values into matrix */
1924     maskcount = 1;
1925     for (j=0; j<nmask; j++) {
1926       a->j[jcount++]  = masked[j];
1927       mask[masked[j]] = maskcount++;
1928     }
1929 
1930     /* set "a" values into matrix */
1931     ishift = bs2*a->i[i];
1932     for (j=0; j<bs; j++) {
1933       kmax = rowlengths[i*bs+j];
1934       for (k=0; k<kmax; k++) {
1935         tmp       = jj[nzcountb]/bs ; /* block col. index */
1936         if (tmp >= i){
1937           block     = mask[tmp] - 1;
1938           point     = jj[nzcountb] - bs*tmp;
1939           idx       = ishift + bs2*block + j + bs*point;
1940           a->a[idx] = aa[nzcountb];
1941         }
1942         nzcountb++;
1943       }
1944     }
1945     /* zero out the mask elements we set */
1946     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1947   }
1948   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1949 
1950   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1951   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1952   ierr = PetscFree(aa);CHKERRQ(ierr);
1953   ierr = PetscFree(jj);CHKERRQ(ierr);
1954   ierr = PetscFree(mask);CHKERRQ(ierr);
1955 
1956   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1957   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1958   ierr = MatView_Private(B);CHKERRQ(ierr);
1959   *A = B;
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1965 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1966 {
1967   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1968   MatScalar    *aa=a->a,*v,*v1;
1969   PetscScalar  *x,*b,*t,sum,d;
1970   PetscErrorCode ierr;
1971   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j;
1972   int          nz,nz1,*vj,*vj1,i;
1973 
1974   PetscFunctionBegin;
1975   its = its*lits;
1976   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1977 
1978   if (bs > 1)
1979     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1980 
1981   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1982   if (xx != bb) {
1983     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1984   } else {
1985     b = x;
1986   }
1987 
1988   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1989 
1990   if (flag & SOR_ZERO_INITIAL_GUESS) {
1991     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1992       for (i=0; i<m; i++)
1993         t[i] = b[i];
1994 
1995       for (i=0; i<m; i++){
1996         d  = *(aa + ai[i]);  /* diag[i] */
1997         v  = aa + ai[i] + 1;
1998         vj = aj + ai[i] + 1;
1999         nz = ai[i+1] - ai[i] - 1;
2000         x[i] = omega*t[i]/d;
2001         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2002         PetscLogFlops(2*nz-1);
2003       }
2004     }
2005 
2006     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2007       for (i=0; i<m; i++)
2008         t[i] = b[i];
2009 
2010       for (i=0; i<m-1; i++){  /* update rhs */
2011         v  = aa + ai[i] + 1;
2012         vj = aj + ai[i] + 1;
2013         nz = ai[i+1] - ai[i] - 1;
2014         while (nz--) t[*vj++] -= x[i]*(*v++);
2015         PetscLogFlops(2*nz-1);
2016       }
2017       for (i=m-1; i>=0; i--){
2018         d  = *(aa + ai[i]);
2019         v  = aa + ai[i] + 1;
2020         vj = aj + ai[i] + 1;
2021         nz = ai[i+1] - ai[i] - 1;
2022         sum = t[i];
2023         while (nz--) sum -= x[*vj++]*(*v++);
2024         PetscLogFlops(2*nz-1);
2025         x[i] =   (1-omega)*x[i] + omega*sum/d;
2026       }
2027     }
2028     its--;
2029   }
2030 
2031   while (its--) {
2032     /*
2033        forward sweep:
2034        for i=0,...,m-1:
2035          sum[i] = (b[i] - U(i,:)x )/d[i];
2036          x[i]   = (1-omega)x[i] + omega*sum[i];
2037          b      = b - x[i]*U^T(i,:);
2038 
2039     */
2040     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2041       for (i=0; i<m; i++)
2042         t[i] = b[i];
2043 
2044       for (i=0; i<m; i++){
2045         d  = *(aa + ai[i]);  /* diag[i] */
2046         v  = aa + ai[i] + 1; v1=v;
2047         vj = aj + ai[i] + 1; vj1=vj;
2048         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2049         sum = t[i];
2050         while (nz1--) sum -= (*v1++)*x[*vj1++];
2051         x[i] = (1-omega)*x[i] + omega*sum/d;
2052         while (nz--) t[*vj++] -= x[i]*(*v++);
2053         PetscLogFlops(4*nz-2);
2054       }
2055     }
2056 
2057   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2058       /*
2059        backward sweep:
2060        b = b - x[i]*U^T(i,:), i=0,...,n-2
2061        for i=m-1,...,0:
2062          sum[i] = (b[i] - U(i,:)x )/d[i];
2063          x[i]   = (1-omega)x[i] + omega*sum[i];
2064       */
2065       for (i=0; i<m; i++)
2066         t[i] = b[i];
2067 
2068       for (i=0; i<m-1; i++){  /* update rhs */
2069         v  = aa + ai[i] + 1;
2070         vj = aj + ai[i] + 1;
2071         nz = ai[i+1] - ai[i] - 1;
2072         while (nz--) t[*vj++] -= x[i]*(*v++);
2073         PetscLogFlops(2*nz-1);
2074       }
2075       for (i=m-1; i>=0; i--){
2076         d  = *(aa + ai[i]);
2077         v  = aa + ai[i] + 1;
2078         vj = aj + ai[i] + 1;
2079         nz = ai[i+1] - ai[i] - 1;
2080         sum = t[i];
2081         while (nz--) sum -= x[*vj++]*(*v++);
2082         PetscLogFlops(2*nz-1);
2083         x[i] =   (1-omega)*x[i] + omega*sum/d;
2084       }
2085     }
2086   }
2087 
2088   ierr = PetscFree(t);CHKERRQ(ierr);
2089   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2090   if (bb != xx) {
2091     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2092   }
2093 
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 
2098 
2099 
2100 
2101 
2102