xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 0be8e73acc056e1817fd33db9a767c1d1a87daa9)
1 
2 /*
3     Defines the basic matrix operations for the SBAIJ (compressed row)
4   matrix storage format.
5 */
6 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
7 #include "src/inline/spops.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 
10 #define CHUNKSIZE  10
11 
12 /*
13      Checks for missing diagonals
14 */
15 #undef __FUNCT__
16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
18 {
19   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       *diag,*jj = a->j,i;
22 
23   PetscFunctionBegin;
24   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
25   diag = a->diag;
26   for (i=0; i<a->mbs; i++) {
27     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
28   }
29   PetscFunctionReturn(0);
30 }
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
35 {
36   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
37   PetscErrorCode ierr;
38   PetscInt       i,mbs = a->mbs;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   PetscInt     n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     PetscInt nz = a->i[n];
63     for (i=0; i<nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   PetscInt     i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     PetscInt nz = a->i[n]-1;
84     for (i=0; i<nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
93 {
94   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
95   PetscErrorCode ierr;
96 
97   PetscFunctionBegin;
98   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz);
99   ierr = PetscFree(a->a);CHKERRQ(ierr);
100   if (!a->singlemalloc) {
101     ierr = PetscFree(a->i);CHKERRQ(ierr);
102     ierr = PetscFree(a->j);CHKERRQ(ierr);
103   }
104   if (a->row) {
105     ierr = ISDestroy(a->row);CHKERRQ(ierr);
106   }
107   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
108   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
109   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
110   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
111   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
112   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
113   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
114   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
115   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
116 
117   if (a->inew){
118     ierr = PetscFree(a->inew);CHKERRQ(ierr);
119     a->inew = 0;
120   }
121   ierr = PetscFree(a);CHKERRQ(ierr);
122 
123   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
124   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
125   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
126   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
127   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
128   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
135 {
136   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
137 
138   PetscFunctionBegin;
139   switch (op) {
140   case MAT_ROW_ORIENTED:
141     a->roworiented    = PETSC_TRUE;
142     break;
143   case MAT_COLUMN_ORIENTED:
144     a->roworiented    = PETSC_FALSE;
145     break;
146   case MAT_COLUMNS_SORTED:
147     a->sorted         = PETSC_TRUE;
148     break;
149   case MAT_COLUMNS_UNSORTED:
150     a->sorted         = PETSC_FALSE;
151     break;
152   case MAT_KEEP_ZEROED_ROWS:
153     a->keepzeroedrows = PETSC_TRUE;
154     break;
155   case MAT_NO_NEW_NONZERO_LOCATIONS:
156     a->nonew          = 1;
157     break;
158   case MAT_NEW_NONZERO_LOCATION_ERR:
159     a->nonew          = -1;
160     break;
161   case MAT_NEW_NONZERO_ALLOCATION_ERR:
162     a->nonew          = -2;
163     break;
164   case MAT_YES_NEW_NONZERO_LOCATIONS:
165     a->nonew          = 0;
166     break;
167   case MAT_ROWS_SORTED:
168   case MAT_ROWS_UNSORTED:
169   case MAT_YES_NEW_DIAGONALS:
170   case MAT_IGNORE_OFF_PROC_ENTRIES:
171   case MAT_USE_HASH_TABLE:
172     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
173     break;
174   case MAT_NO_NEW_DIAGONALS:
175     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
176   case MAT_NOT_SYMMETRIC:
177   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
178   case MAT_HERMITIAN:
179     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
180   case MAT_SYMMETRIC:
181   case MAT_STRUCTURALLY_SYMMETRIC:
182   case MAT_NOT_HERMITIAN:
183   case MAT_SYMMETRY_ETERNAL:
184   case MAT_NOT_SYMMETRY_ETERNAL:
185     break;
186   default:
187     SETERRQ(PETSC_ERR_SUP,"unknown option");
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
194 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
195 {
196   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
197   PetscErrorCode ierr;
198   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
199   MatScalar      *aa,*aa_i;
200   PetscScalar    *v_i;
201 
202   PetscFunctionBegin;
203   bs  = A->bs;
204   ai  = a->i;
205   aj  = a->j;
206   aa  = a->a;
207   bs2 = a->bs2;
208 
209   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
210 
211   bn  = row/bs;   /* Block number */
212   bp  = row % bs; /* Block position */
213   M   = ai[bn+1] - ai[bn];
214   *ncols = bs*M;
215 
216   if (v) {
217     *v = 0;
218     if (*ncols) {
219       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
220       for (i=0; i<M; i++) { /* for each block in the block row */
221         v_i  = *v + i*bs;
222         aa_i = aa + bs2*(ai[bn] + i);
223         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
224       }
225     }
226   }
227 
228   if (cols) {
229     *cols = 0;
230     if (*ncols) {
231       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
232       for (i=0; i<M; i++) { /* for each block in the block row */
233         cols_i = *cols + i*bs;
234         itmp  = bs*aj[ai[bn] + i];
235         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
236       }
237     }
238   }
239 
240   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
241   /* this segment is currently removed, so only entries in the upper triangle are obtained */
242 #ifdef column_search
243   v_i    = *v    + M*bs;
244   cols_i = *cols + M*bs;
245   for (i=0; i<bn; i++){ /* for each block row */
246     M = ai[i+1] - ai[i];
247     for (j=0; j<M; j++){
248       itmp = aj[ai[i] + j];    /* block column value */
249       if (itmp == bn){
250         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
251         for (k=0; k<bs; k++) {
252           *cols_i++ = i*bs+k;
253           *v_i++    = aa_i[k];
254         }
255         *ncols += bs;
256         break;
257       }
258     }
259   }
260 #endif
261 
262   PetscFunctionReturn(0);
263 }
264 
265 #undef __FUNCT__
266 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
267 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
268 {
269   PetscErrorCode ierr;
270 
271   PetscFunctionBegin;
272   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
273   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
279 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
280 {
281   PetscErrorCode ierr;
282   PetscFunctionBegin;
283   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 #undef __FUNCT__
288 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
289 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
290 {
291   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
292   PetscErrorCode    ierr;
293   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
294   char              *name;
295   PetscViewerFormat format;
296 
297   PetscFunctionBegin;
298   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
299   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
300   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
301     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
302   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
303     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
304   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
305     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
306     for (i=0; i<a->mbs; i++) {
307       for (j=0; j<bs; j++) {
308         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
309         for (k=a->i[i]; k<a->i[i+1]; k++) {
310           for (l=0; l<bs; l++) {
311 #if defined(PETSC_USE_COMPLEX)
312             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
313               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
314                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
315             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
316               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
317                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
318             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
319               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
320             }
321 #else
322             if (a->a[bs2*k + l*bs + j] != 0.0) {
323               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
324             }
325 #endif
326           }
327         }
328         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
329       }
330     }
331     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
332   } else {
333     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
334     for (i=0; i<a->mbs; i++) {
335       for (j=0; j<bs; j++) {
336         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
337         for (k=a->i[i]; k<a->i[i+1]; k++) {
338           for (l=0; l<bs; l++) {
339 #if defined(PETSC_USE_COMPLEX)
340             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
341               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
342                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
343             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
344               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
345                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
346             } else {
347               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
348             }
349 #else
350             ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
351 #endif
352           }
353         }
354         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
355       }
356     }
357     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
358   }
359   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
365 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
366 {
367   Mat            A = (Mat) Aa;
368   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
369   PetscErrorCode ierr;
370   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
371   PetscMPIInt    rank;
372   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
373   MatScalar      *aa;
374   MPI_Comm       comm;
375   PetscViewer    viewer;
376 
377   PetscFunctionBegin;
378   /*
379       This is nasty. If this is called from an originally parallel matrix
380    then all processes call this,but only the first has the matrix so the
381    rest should return immediately.
382   */
383   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
384   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
385   if (rank) PetscFunctionReturn(0);
386 
387   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
388 
389   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
390   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
391 
392   /* loop over matrix elements drawing boxes */
393   color = PETSC_DRAW_BLUE;
394   for (i=0,row=0; i<mbs; i++,row+=bs) {
395     for (j=a->i[i]; j<a->i[i+1]; j++) {
396       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
397       x_l = a->j[j]*bs; x_r = x_l + 1.0;
398       aa = a->a + j*bs2;
399       for (k=0; k<bs; k++) {
400         for (l=0; l<bs; l++) {
401           if (PetscRealPart(*aa++) >=  0.) continue;
402           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
403         }
404       }
405     }
406   }
407   color = PETSC_DRAW_CYAN;
408   for (i=0,row=0; i<mbs; i++,row+=bs) {
409     for (j=a->i[i]; j<a->i[i+1]; j++) {
410       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
411       x_l = a->j[j]*bs; x_r = x_l + 1.0;
412       aa = a->a + j*bs2;
413       for (k=0; k<bs; k++) {
414         for (l=0; l<bs; l++) {
415           if (PetscRealPart(*aa++) != 0.) continue;
416           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
417         }
418       }
419     }
420   }
421 
422   color = PETSC_DRAW_RED;
423   for (i=0,row=0; i<mbs; i++,row+=bs) {
424     for (j=a->i[i]; j<a->i[i+1]; j++) {
425       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
426       x_l = a->j[j]*bs; x_r = x_l + 1.0;
427       aa = a->a + j*bs2;
428       for (k=0; k<bs; k++) {
429         for (l=0; l<bs; l++) {
430           if (PetscRealPart(*aa++) <= 0.) continue;
431           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
432         }
433       }
434     }
435   }
436   PetscFunctionReturn(0);
437 }
438 
439 #undef __FUNCT__
440 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
441 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
442 {
443   PetscErrorCode ierr;
444   PetscReal      xl,yl,xr,yr,w,h;
445   PetscDraw      draw;
446   PetscTruth     isnull;
447 
448   PetscFunctionBegin;
449 
450   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
451   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
452 
453   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
454   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
455   xr += w;    yr += h;  xl = -w;     yl = -h;
456   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
457   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
458   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatView_SeqSBAIJ"
464 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
465 {
466   PetscErrorCode ierr;
467   PetscTruth     iascii,isdraw;
468 
469   PetscFunctionBegin;
470   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
471   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
472   if (iascii){
473     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
474   } else if (isdraw) {
475     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
476   } else {
477     Mat B;
478     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
479     ierr = MatView(B,viewer);CHKERRQ(ierr);
480     ierr = MatDestroy(B);CHKERRQ(ierr);
481   }
482   PetscFunctionReturn(0);
483 }
484 
485 
486 #undef __FUNCT__
487 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
488 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
489 {
490   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
491   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
492   PetscInt     *ai = a->i,*ailen = a->ilen;
493   PetscInt     brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
494   MatScalar    *ap,*aa = a->a,zero = 0.0;
495 
496   PetscFunctionBegin;
497   for (k=0; k<m; k++) { /* loop over rows */
498     row  = im[k]; brow = row/bs;
499     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
500     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
501     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
502     nrow = ailen[brow];
503     for (l=0; l<n; l++) { /* loop over columns */
504       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]);
505       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
506       col  = in[l] ;
507       bcol = col/bs;
508       cidx = col%bs;
509       ridx = row%bs;
510       high = nrow;
511       low  = 0; /* assume unsorted */
512       while (high-low > 5) {
513         t = (low+high)/2;
514         if (rp[t] > bcol) high = t;
515         else             low  = t;
516       }
517       for (i=low; i<high; i++) {
518         if (rp[i] > bcol) break;
519         if (rp[i] == bcol) {
520           *v++ = ap[bs2*i+bs*cidx+ridx];
521           goto finished;
522         }
523       }
524       *v++ = zero;
525       finished:;
526     }
527   }
528   PetscFunctionReturn(0);
529 }
530 
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
534 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
535 {
536   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
537   PetscErrorCode  ierr;
538   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
539   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
540   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
541   PetscTruth      roworiented=a->roworiented;
542   const MatScalar *value = v;
543   MatScalar       *ap,*aa = a->a,*bap;
544 
545   PetscFunctionBegin;
546   if (roworiented) {
547     stepval = (n-1)*bs;
548   } else {
549     stepval = (m-1)*bs;
550   }
551   for (k=0; k<m; k++) { /* loop over added rows */
552     row  = im[k];
553     if (row < 0) continue;
554 #if defined(PETSC_USE_DEBUG)
555     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
556 #endif
557     rp   = aj + ai[row];
558     ap   = aa + bs2*ai[row];
559     rmax = imax[row];
560     nrow = ailen[row];
561     low  = 0;
562     for (l=0; l<n; l++) { /* loop over added columns */
563       if (in[l] < 0) continue;
564       col = in[l];
565 #if defined(PETSC_USE_DEBUG)
566       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
567 #endif
568       if (col < row) continue; /* ignore lower triangular block */
569       if (roworiented) {
570         value = v + k*(stepval+bs)*bs + l*bs;
571       } else {
572         value = v + l*(stepval+bs)*bs + k*bs;
573       }
574       if (!sorted) low = 0; high = nrow;
575       while (high-low > 7) {
576         t = (low+high)/2;
577         if (rp[t] > col) high = t;
578         else             low  = t;
579       }
580       for (i=low; i<high; i++) {
581         if (rp[i] > col) break;
582         if (rp[i] == col) {
583           bap  = ap +  bs2*i;
584           if (roworiented) {
585             if (is == ADD_VALUES) {
586               for (ii=0; ii<bs; ii++,value+=stepval) {
587                 for (jj=ii; jj<bs2; jj+=bs) {
588                   bap[jj] += *value++;
589                 }
590               }
591             } else {
592               for (ii=0; ii<bs; ii++,value+=stepval) {
593                 for (jj=ii; jj<bs2; jj+=bs) {
594                   bap[jj] = *value++;
595                 }
596               }
597             }
598           } else {
599             if (is == ADD_VALUES) {
600               for (ii=0; ii<bs; ii++,value+=stepval) {
601                 for (jj=0; jj<bs; jj++) {
602                   *bap++ += *value++;
603                 }
604               }
605             } else {
606               for (ii=0; ii<bs; ii++,value+=stepval) {
607                 for (jj=0; jj<bs; jj++) {
608                   *bap++  = *value++;
609                 }
610               }
611             }
612           }
613           goto noinsert2;
614         }
615       }
616       if (nonew == 1) goto noinsert2;
617       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
618       if (nrow >= rmax) {
619         /* there is no extra room in row, therefore enlarge */
620         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
621         MatScalar *new_a;
622 
623         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
624 
625         /* malloc new storage space */
626         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
627 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
628         new_j   = (PetscInt*)(new_a + bs2*new_nz);
629         new_i   = new_j + new_nz;
630 
631         /* copy over old data into new slots */
632         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
633         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
634         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
635         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
636         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
637         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
638         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
639         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
640         /* free up old matrix storage */
641         ierr = PetscFree(a->a);CHKERRQ(ierr);
642         if (!a->singlemalloc) {
643           ierr = PetscFree(a->i);CHKERRQ(ierr);
644           ierr = PetscFree(a->j);CHKERRQ(ierr);
645         }
646         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
647         a->singlemalloc = PETSC_TRUE;
648 
649         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
650         rmax = imax[row] = imax[row] + CHUNKSIZE;
651         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
652         a->maxnz += bs2*CHUNKSIZE;
653         a->reallocs++;
654         a->nz++;
655       }
656       N = nrow++ - 1;
657       /* shift up all the later entries in this row */
658       for (ii=N; ii>=i; ii--) {
659         rp[ii+1] = rp[ii];
660         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
661       }
662       if (N >= i) {
663         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
664       }
665       rp[i] = col;
666       bap   = ap +  bs2*i;
667       if (roworiented) {
668         for (ii=0; ii<bs; ii++,value+=stepval) {
669           for (jj=ii; jj<bs2; jj+=bs) {
670             bap[jj] = *value++;
671           }
672         }
673       } else {
674         for (ii=0; ii<bs; ii++,value+=stepval) {
675           for (jj=0; jj<bs; jj++) {
676             *bap++  = *value++;
677           }
678         }
679       }
680       noinsert2:;
681       low = i;
682     }
683     ailen[row] = nrow;
684   }
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
690 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
691 {
692   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
693   PetscErrorCode ierr;
694   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
695   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
696   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
697   MatScalar      *aa = a->a,*ap;
698 
699   PetscFunctionBegin;
700   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
701 
702   if (m) rmax = ailen[0];
703   for (i=1; i<mbs; i++) {
704     /* move each row back by the amount of empty slots (fshift) before it*/
705     fshift += imax[i-1] - ailen[i-1];
706     rmax   = PetscMax(rmax,ailen[i]);
707     if (fshift) {
708       ip = aj + ai[i]; ap = aa + bs2*ai[i];
709       N = ailen[i];
710       for (j=0; j<N; j++) {
711         ip[j-fshift] = ip[j];
712         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
713       }
714     }
715     ai[i] = ai[i-1] + ailen[i-1];
716   }
717   if (mbs) {
718     fshift += imax[mbs-1] - ailen[mbs-1];
719     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
720   }
721   /* reset ilen and imax for each row */
722   for (i=0; i<mbs; i++) {
723     ailen[i] = imax[i] = ai[i+1] - ai[i];
724   }
725   a->nz = ai[mbs];
726 
727   /* diagonals may have moved, reset it */
728   if (a->diag) {
729     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
730   }
731   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",
732            m,A->m,A->bs,fshift*bs2,a->nz*bs2);
733   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",
734            a->reallocs);
735   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax);
736   a->reallocs          = 0;
737   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
738 
739   PetscFunctionReturn(0);
740 }
741 
742 /*
743    This function returns an array of flags which indicate the locations of contiguous
744    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
745    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
746    Assume: sizes should be long enough to hold all the values.
747 */
748 #undef __FUNCT__
749 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
750 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
751 {
752   PetscInt   i,j,k,row;
753   PetscTruth flg;
754 
755   PetscFunctionBegin;
756   for (i=0,j=0; i<n; j++) {
757     row = idx[i];
758     if (row%bs!=0) { /* Not the begining of a block */
759       sizes[j] = 1;
760       i++;
761     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
762       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
763       i++;
764     } else { /* Begining of the block, so check if the complete block exists */
765       flg = PETSC_TRUE;
766       for (k=1; k<bs; k++) {
767         if (row+k != idx[i+k]) { /* break in the block */
768           flg = PETSC_FALSE;
769           break;
770         }
771       }
772       if (flg == PETSC_TRUE) { /* No break in the bs */
773         sizes[j] = bs;
774         i+= bs;
775       } else {
776         sizes[j] = 1;
777         i++;
778       }
779     }
780   }
781   *bs_max = j;
782   PetscFunctionReturn(0);
783 }
784 
785 
786 /* Only add/insert a(i,j) with i<=j (blocks).
787    Any a(i,j) with i>j input by user is ingored.
788 */
789 
790 #undef __FUNCT__
791 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
792 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
793 {
794   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
795   PetscErrorCode ierr;
796   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
797   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
798   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
799   PetscInt       ridx,cidx,bs2=a->bs2;
800   MatScalar      *ap,value,*aa=a->a,*bap;
801 
802   PetscFunctionBegin;
803 
804   for (k=0; k<m; k++) { /* loop over added rows */
805     row  = im[k];       /* row number */
806     brow = row/bs;      /* block row number */
807     if (row < 0) continue;
808 #if defined(PETSC_USE_DEBUG)
809     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
810 #endif
811     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
812     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
813     rmax = imax[brow];  /* maximum space allocated for this row */
814     nrow = ailen[brow]; /* actual length of this row */
815     low  = 0;
816 
817     for (l=0; l<n; l++) { /* loop over added columns */
818       if (in[l] < 0) continue;
819 #if defined(PETSC_USE_DEBUG)
820       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1);
821 #endif
822       col = in[l];
823       bcol = col/bs;              /* block col number */
824 
825       if (brow <= bcol){
826         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
827         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
828           /* element value a(k,l) */
829           if (roworiented) {
830             value = v[l + k*n];
831           } else {
832             value = v[k + l*m];
833           }
834 
835           /* move pointer bap to a(k,l) quickly and add/insert value */
836           if (!sorted) low = 0; high = nrow;
837           while (high-low > 7) {
838             t = (low+high)/2;
839             if (rp[t] > bcol) high = t;
840             else              low  = t;
841           }
842           for (i=low; i<high; i++) {
843             /* printf("The loop of i=low.., rp[%D]=%D\n",i,rp[i]); */
844             if (rp[i] > bcol) break;
845             if (rp[i] == bcol) {
846               bap  = ap +  bs2*i + bs*cidx + ridx;
847               if (is == ADD_VALUES) *bap += value;
848               else                  *bap  = value;
849               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
850               if (brow == bcol && ridx < cidx){
851                 bap  = ap +  bs2*i + bs*ridx + cidx;
852                 if (is == ADD_VALUES) *bap += value;
853                 else                  *bap  = value;
854               }
855               goto noinsert1;
856             }
857           }
858 
859       if (nonew == 1) goto noinsert1;
860       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
861       if (nrow >= rmax) {
862         /* there is no extra room in row, therefore enlarge */
863         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
864         MatScalar *new_a;
865 
866         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
867 
868         /* Malloc new storage space */
869         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
870         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
871         new_j = (PetscInt*)(new_a + bs2*new_nz);
872         new_i = new_j + new_nz;
873 
874         /* copy over old data into new slots */
875         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
876         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
877         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
878         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
879         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
880         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
881         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
882         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
883         /* free up old matrix storage */
884         ierr = PetscFree(a->a);CHKERRQ(ierr);
885         if (!a->singlemalloc) {
886           ierr = PetscFree(a->i);CHKERRQ(ierr);
887           ierr = PetscFree(a->j);CHKERRQ(ierr);
888         }
889         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
890         a->singlemalloc = PETSC_TRUE;
891 
892         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
893         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
894         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
895         a->maxnz += bs2*CHUNKSIZE;
896         a->reallocs++;
897         a->nz++;
898       }
899 
900       N = nrow++ - 1;
901       /* shift up all the later entries in this row */
902       for (ii=N; ii>=i; ii--) {
903         rp[ii+1] = rp[ii];
904         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
905       }
906       if (N>=i) {
907         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
908       }
909       rp[i]                      = bcol;
910       ap[bs2*i + bs*cidx + ridx] = value;
911       noinsert1:;
912       low = i;
913       /* } */
914         }
915       } /* end of if .. if..  */
916     }                     /* end of loop over added columns */
917     ailen[brow] = nrow;
918   }                       /* end of loop over added rows */
919 
920   PetscFunctionReturn(0);
921 }
922 
923 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
924 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
925 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt);
926 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*);
927 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]);
928 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat);
929 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
930 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
931 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
932 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
933 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
934 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
935 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
936 
937 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
938 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
939 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
940 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
941 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
942 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
943 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
945 
946 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
947 
948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
956 
957 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
958 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
959 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
960 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
961 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
962 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
963 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
964 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
965 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*);
966 
967 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
968 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
969 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
970 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
971 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
972 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
973 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
974 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
975 
976 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
977 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
978 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
979 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
980 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
981 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
982 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
983 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
984 
985 #ifdef HAVE_MatICCFactor
986 /* modified from MatILUFactor_SeqSBAIJ, needs further work!  */
987 #undef __FUNCT__
988 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
989 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
990 {
991   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
992   Mat            outA;
993   PetscErrorCode ierr;
994   PetscTruth     row_identity,col_identity;
995 
996   PetscFunctionBegin;
997   outA          = inA;
998   inA->factor   = FACTOR_CHOLESKY;
999 
1000   if (!a->diag) {
1001     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1002   }
1003   /*
1004       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1005       for ILU(0) factorization with natural ordering
1006   */
1007   switch (a->bs) {
1008   case 1:
1009     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1010     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1011     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1012     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1013   case 2:
1014     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1015     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1016     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1017     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1018     break;
1019   case 3:
1020     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1021     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1022     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1023     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1024     break;
1025   case 4:
1026     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1027     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1028     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1029     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1030     break;
1031   case 5:
1032     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1033     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1034     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1035     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1036     break;
1037   case 6:
1038     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1039     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1040     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1041     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1042     break;
1043   case 7:
1044     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1045     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1046     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1047     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1048     break;
1049   default:
1050     a->row        = row;
1051     a->icol       = col;
1052     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1053     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1054 
1055     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1056     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1057     PetscLogObjectParent(inA,a->icol);
1058 
1059     if (!a->solve_work) {
1060       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1061       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1062     }
1063   }
1064 
1065   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1066 
1067   PetscFunctionReturn(0);
1068 }
1069 #endif
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1073 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1074 {
1075   static PetscTruth called = PETSC_FALSE;
1076   MPI_Comm          comm = A->comm;
1077   PetscErrorCode    ierr;
1078 
1079   PetscFunctionBegin;
1080   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1081   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1082   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 EXTERN_C_BEGIN
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1089 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1090 {
1091   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1092   PetscInt     i,nz,n;
1093 
1094   PetscFunctionBegin;
1095   nz = baij->maxnz;
1096   n  = mat->n;
1097   for (i=0; i<nz; i++) {
1098     baij->j[i] = indices[i];
1099   }
1100   baij->nz = nz;
1101   for (i=0; i<n; i++) {
1102     baij->ilen[i] = baij->imax[i];
1103   }
1104 
1105   PetscFunctionReturn(0);
1106 }
1107 EXTERN_C_END
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1111 /*@
1112     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1113        in the matrix.
1114 
1115   Input Parameters:
1116 +  mat     - the SeqSBAIJ matrix
1117 -  indices - the column indices
1118 
1119   Level: advanced
1120 
1121   Notes:
1122     This can be called if you have precomputed the nonzero structure of the
1123   matrix and want to provide it to the matrix object to improve the performance
1124   of the MatSetValues() operation.
1125 
1126     You MUST have set the correct numbers of nonzeros per row in the call to
1127   MatCreateSeqSBAIJ().
1128 
1129     MUST be called before any calls to MatSetValues()
1130 
1131 .seealso: MatCreateSeqSBAIJ
1132 @*/
1133 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1134 {
1135   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1136 
1137   PetscFunctionBegin;
1138   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1139   PetscValidPointer(indices,2);
1140   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1141   if (f) {
1142     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1143   } else {
1144     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1151 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1152 {
1153   PetscErrorCode ierr;
1154 
1155   PetscFunctionBegin;
1156   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1162 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1163 {
1164   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1165   PetscFunctionBegin;
1166   *array = a->a;
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1172 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1173 {
1174   PetscFunctionBegin;
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #include "petscblaslapack.h"
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1181 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1182 {
1183   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1184   PetscErrorCode ierr;
1185   PetscInt       i,bs=Y->bs,bs2,j;
1186   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1187 
1188   PetscFunctionBegin;
1189   if (str == SAME_NONZERO_PATTERN) {
1190     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1191   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1192     if (y->xtoy && y->XtoY != X) {
1193       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1194       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1195     }
1196     if (!y->xtoy) { /* get xtoy */
1197       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1198       y->XtoY = X;
1199     }
1200     bs2 = bs*bs;
1201     for (i=0; i<x->nz; i++) {
1202       j = 0;
1203       while (j < bs2){
1204         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1205         j++;
1206       }
1207     }
1208     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1209   } else {
1210     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1211   }
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 #undef __FUNCT__
1216 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1217 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1218 {
1219   PetscFunctionBegin;
1220   *flg = PETSC_TRUE;
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1226 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1227 {
1228   PetscFunctionBegin;
1229   *flg = PETSC_TRUE;
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1235 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1236 {
1237   PetscFunctionBegin;
1238   *flg = PETSC_FALSE;
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 /* -------------------------------------------------------------------*/
1243 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1244        MatGetRow_SeqSBAIJ,
1245        MatRestoreRow_SeqSBAIJ,
1246        MatMult_SeqSBAIJ_N,
1247 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1248        MatMult_SeqSBAIJ_N,
1249        MatMultAdd_SeqSBAIJ_N,
1250        MatSolve_SeqSBAIJ_N,
1251        0,
1252        0,
1253 /*10*/ 0,
1254        0,
1255        MatCholeskyFactor_SeqSBAIJ,
1256        MatRelax_SeqSBAIJ,
1257        MatTranspose_SeqSBAIJ,
1258 /*15*/ MatGetInfo_SeqSBAIJ,
1259        MatEqual_SeqSBAIJ,
1260        MatGetDiagonal_SeqSBAIJ,
1261        MatDiagonalScale_SeqSBAIJ,
1262        MatNorm_SeqSBAIJ,
1263 /*20*/ 0,
1264        MatAssemblyEnd_SeqSBAIJ,
1265        0,
1266        MatSetOption_SeqSBAIJ,
1267        MatZeroEntries_SeqSBAIJ,
1268 /*25*/ 0,
1269        0,
1270        0,
1271        MatCholeskyFactorSymbolic_SeqSBAIJ,
1272        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1273 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1274        0,
1275        MatICCFactorSymbolic_SeqSBAIJ,
1276        MatGetArray_SeqSBAIJ,
1277        MatRestoreArray_SeqSBAIJ,
1278 /*35*/ MatDuplicate_SeqSBAIJ,
1279        0,
1280        0,
1281        0,
1282        0,
1283 /*40*/ MatAXPY_SeqSBAIJ,
1284        MatGetSubMatrices_SeqSBAIJ,
1285        MatIncreaseOverlap_SeqSBAIJ,
1286        MatGetValues_SeqSBAIJ,
1287        0,
1288 /*45*/ MatPrintHelp_SeqSBAIJ,
1289        MatScale_SeqSBAIJ,
1290        0,
1291        0,
1292        0,
1293 /*50*/ 0,
1294        MatGetRowIJ_SeqSBAIJ,
1295        MatRestoreRowIJ_SeqSBAIJ,
1296        0,
1297        0,
1298 /*55*/ 0,
1299        0,
1300        0,
1301        0,
1302        MatSetValuesBlocked_SeqSBAIJ,
1303 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1304        0,
1305        0,
1306        MatGetPetscMaps_Petsc,
1307        0,
1308 /*65*/ 0,
1309        0,
1310        0,
1311        0,
1312        0,
1313 /*70*/ MatGetRowMax_SeqSBAIJ,
1314        0,
1315        0,
1316        0,
1317        0,
1318 /*75*/ 0,
1319        0,
1320        0,
1321        0,
1322        0,
1323 /*80*/ 0,
1324        0,
1325        0,
1326 #if !defined(PETSC_USE_COMPLEX)
1327        MatGetInertia_SeqSBAIJ,
1328 #else
1329        0,
1330 #endif
1331        MatLoad_SeqSBAIJ,
1332 /*85*/ MatIsSymmetric_SeqSBAIJ,
1333        MatIsHermitian_SeqSBAIJ,
1334        MatIsStructurallySymmetric_SeqSBAIJ,
1335        0,
1336        0,
1337 /*90*/ 0,
1338        0,
1339        0,
1340        0,
1341        0,
1342 /*95*/ 0,
1343        0,
1344        0,
1345        0};
1346 
1347 EXTERN_C_BEGIN
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1350 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat)
1351 {
1352   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1353   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   if (aij->nonew != 1) {
1358     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1359   }
1360 
1361   /* allocate space for values if not already there */
1362   if (!aij->saved_values) {
1363     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1364   }
1365 
1366   /* copy values over */
1367   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 EXTERN_C_END
1371 
1372 EXTERN_C_BEGIN
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1375 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat)
1376 {
1377   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1378   PetscErrorCode ierr;
1379   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
1380 
1381   PetscFunctionBegin;
1382   if (aij->nonew != 1) {
1383     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1384   }
1385   if (!aij->saved_values) {
1386     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1387   }
1388 
1389   /* copy values over */
1390   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 EXTERN_C_END
1394 
1395 EXTERN_C_BEGIN
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1398 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1399 {
1400   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1401   PetscErrorCode ierr;
1402   PetscInt       i,len,mbs,bs2;
1403   PetscTruth     flg;
1404 
1405   PetscFunctionBegin;
1406   B->preallocated = PETSC_TRUE;
1407   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1408   mbs  = B->m/bs;
1409   bs2  = bs*bs;
1410 
1411   if (mbs*bs != B->m) {
1412     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1413   }
1414 
1415   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1416   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1417   if (nnz) {
1418     for (i=0; i<mbs; i++) {
1419       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1420       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1421     }
1422   }
1423 
1424   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1425   if (!flg) {
1426     switch (bs) {
1427     case 1:
1428       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1429       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1430       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1431       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1432       B->ops->mult            = MatMult_SeqSBAIJ_1;
1433       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1434       break;
1435     case 2:
1436       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1437       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1438       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_2;
1439       B->ops->mult            = MatMult_SeqSBAIJ_2;
1440       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1441       break;
1442     case 3:
1443       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1444       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1445       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_3;
1446       B->ops->mult            = MatMult_SeqSBAIJ_3;
1447       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1448       break;
1449     case 4:
1450       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1451       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1452       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_4;
1453       B->ops->mult            = MatMult_SeqSBAIJ_4;
1454       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1455       break;
1456     case 5:
1457       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1458       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1459       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_5;
1460       B->ops->mult            = MatMult_SeqSBAIJ_5;
1461       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1462       break;
1463     case 6:
1464       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1465       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1466       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_6;
1467       B->ops->mult            = MatMult_SeqSBAIJ_6;
1468       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1469       break;
1470     case 7:
1471       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1472       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1473       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_7;
1474       B->ops->mult            = MatMult_SeqSBAIJ_7;
1475       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1476       break;
1477     }
1478   }
1479 
1480   b->mbs = mbs;
1481   b->nbs = mbs;
1482   ierr   = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
1483   if (!nnz) {
1484     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1485     else if (nz <= 0)        nz = 1;
1486     for (i=0; i<mbs; i++) {
1487       b->imax[i] = nz;
1488     }
1489     nz = nz*mbs; /* total nz */
1490   } else {
1491     nz = 0;
1492     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1493   }
1494   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1495 
1496   /* allocate the matrix space */
1497   len  = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
1498   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1499   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1500   b->j = (PetscInt*)(b->a + nz*bs2);
1501   ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1502   b->i = b->j + nz;
1503   b->singlemalloc = PETSC_TRUE;
1504 
1505   /* pointer to beginning of each row */
1506   b->i[0] = 0;
1507   for (i=1; i<mbs+1; i++) {
1508     b->i[i] = b->i[i-1] + b->imax[i-1];
1509   }
1510 
1511   /* b->ilen will count nonzeros in each block row so far. */
1512   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
1513   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1514   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1515 
1516   B->bs               = bs;
1517   b->bs2              = bs2;
1518   b->nz             = 0;
1519   b->maxnz          = nz*bs2;
1520 
1521   b->inew             = 0;
1522   b->jnew             = 0;
1523   b->anew             = 0;
1524   b->a2anew           = 0;
1525   b->permute          = PETSC_FALSE;
1526   PetscFunctionReturn(0);
1527 }
1528 EXTERN_C_END
1529 
1530 EXTERN_C_BEGIN
1531 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1532 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*);
1533 EXTERN_C_END
1534 
1535 /*MC
1536    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1537    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1538 
1539    Options Database Keys:
1540 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1541 
1542   Level: beginner
1543 
1544 .seealso: MatCreateSeqSBAIJ
1545 M*/
1546 
1547 EXTERN_C_BEGIN
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1550 PetscErrorCode MatCreate_SeqSBAIJ(Mat B)
1551 {
1552   Mat_SeqSBAIJ   *b;
1553   PetscErrorCode ierr;
1554   PetscMPIInt    size;
1555 
1556   PetscFunctionBegin;
1557   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1558   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1559   B->m = B->M = PetscMax(B->m,B->M);
1560   B->n = B->N = PetscMax(B->n,B->N);
1561 
1562   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1563   B->data = (void*)b;
1564   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1565   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1566   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1567   B->ops->view        = MatView_SeqSBAIJ;
1568   B->factor           = 0;
1569   B->lupivotthreshold = 1.0;
1570   B->mapping          = 0;
1571   b->row              = 0;
1572   b->icol             = 0;
1573   b->reallocs         = 0;
1574   b->saved_values     = 0;
1575 
1576   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1577   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1578 
1579   b->sorted           = PETSC_FALSE;
1580   b->roworiented      = PETSC_TRUE;
1581   b->nonew            = 0;
1582   b->diag             = 0;
1583   b->solve_work       = 0;
1584   b->mult_work        = 0;
1585   B->spptr            = 0;
1586   b->keepzeroedrows   = PETSC_FALSE;
1587   b->xtoy             = 0;
1588   b->XtoY             = 0;
1589 
1590   b->inew             = 0;
1591   b->jnew             = 0;
1592   b->anew             = 0;
1593   b->a2anew           = 0;
1594   b->permute          = PETSC_FALSE;
1595 
1596   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1597                                      "MatStoreValues_SeqSBAIJ",
1598                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1600                                      "MatRetrieveValues_SeqSBAIJ",
1601                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1603                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1604                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1606                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1607                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1609                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1610                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1612                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1613                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1614 
1615   B->symmetric                  = PETSC_TRUE;
1616   B->structurally_symmetric     = PETSC_TRUE;
1617   B->symmetric_set              = PETSC_TRUE;
1618   B->structurally_symmetric_set = PETSC_TRUE;
1619   PetscFunctionReturn(0);
1620 }
1621 EXTERN_C_END
1622 
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1625 /*@C
1626    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1627    compressed row) format.  For good matrix assembly performance the
1628    user should preallocate the matrix storage by setting the parameter nz
1629    (or the array nnz).  By setting these parameters accurately, performance
1630    during matrix assembly can be increased by more than a factor of 50.
1631 
1632    Collective on Mat
1633 
1634    Input Parameters:
1635 +  A - the symmetric matrix
1636 .  bs - size of block
1637 .  nz - number of block nonzeros per block row (same for all rows)
1638 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1639          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1640 
1641    Options Database Keys:
1642 .   -mat_no_unroll - uses code that does not unroll the loops in the
1643                      block calculations (much slower)
1644 .    -mat_block_size - size of the blocks to use
1645 
1646    Level: intermediate
1647 
1648    Notes:
1649    Specify the preallocated storage with either nz or nnz (not both).
1650    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1651    allocation.  For additional details, see the users manual chapter on
1652    matrices.
1653 
1654    If the nnz parameter is given then the nz parameter is ignored
1655 
1656 
1657 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1658 @*/
1659 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1660 {
1661   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1662 
1663   PetscFunctionBegin;
1664   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1665   if (f) {
1666     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1667   }
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatCreateSeqSBAIJ"
1673 /*@C
1674    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1675    compressed row) format.  For good matrix assembly performance the
1676    user should preallocate the matrix storage by setting the parameter nz
1677    (or the array nnz).  By setting these parameters accurately, performance
1678    during matrix assembly can be increased by more than a factor of 50.
1679 
1680    Collective on MPI_Comm
1681 
1682    Input Parameters:
1683 +  comm - MPI communicator, set to PETSC_COMM_SELF
1684 .  bs - size of block
1685 .  m - number of rows, or number of columns
1686 .  nz - number of block nonzeros per block row (same for all rows)
1687 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1688          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1689 
1690    Output Parameter:
1691 .  A - the symmetric matrix
1692 
1693    Options Database Keys:
1694 .   -mat_no_unroll - uses code that does not unroll the loops in the
1695                      block calculations (much slower)
1696 .    -mat_block_size - size of the blocks to use
1697 
1698    Level: intermediate
1699 
1700    Notes:
1701 
1702    Specify the preallocated storage with either nz or nnz (not both).
1703    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1704    allocation.  For additional details, see the users manual chapter on
1705    matrices.
1706 
1707    If the nnz parameter is given then the nz parameter is ignored
1708 
1709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1710 @*/
1711 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1712 {
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1717   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1718   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1724 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1725 {
1726   Mat            C;
1727   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1728   PetscErrorCode ierr;
1729   PetscInt       i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1730 
1731   PetscFunctionBegin;
1732   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1733 
1734   *B = 0;
1735   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1736   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1737   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1738   c    = (Mat_SeqSBAIJ*)C->data;
1739 
1740   C->preallocated   = PETSC_TRUE;
1741   C->factor         = A->factor;
1742   c->row            = 0;
1743   c->icol           = 0;
1744   c->saved_values   = 0;
1745   c->keepzeroedrows = a->keepzeroedrows;
1746   C->assembled      = PETSC_TRUE;
1747 
1748   C->M    = A->M;
1749   C->N    = A->N;
1750   C->bs   = A->bs;
1751   c->bs2  = a->bs2;
1752   c->mbs  = a->mbs;
1753   c->nbs  = a->nbs;
1754 
1755   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
1756   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
1757   for (i=0; i<mbs; i++) {
1758     c->imax[i] = a->imax[i];
1759     c->ilen[i] = a->ilen[i];
1760   }
1761 
1762   /* allocate the matrix space */
1763   c->singlemalloc = PETSC_TRUE;
1764   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
1765   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1766   c->j = (PetscInt*)(c->a + nz*bs2);
1767   c->i = c->j + nz;
1768   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1769   if (mbs > 0) {
1770     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1771     if (cpvalues == MAT_COPY_VALUES) {
1772       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1773     } else {
1774       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1775     }
1776   }
1777 
1778   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1779   c->sorted      = a->sorted;
1780   c->roworiented = a->roworiented;
1781   c->nonew       = a->nonew;
1782 
1783   if (a->diag) {
1784     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
1785     PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
1786     for (i=0; i<mbs; i++) {
1787       c->diag[i] = a->diag[i];
1788     }
1789   } else c->diag        = 0;
1790   c->nz               = a->nz;
1791   c->maxnz            = a->maxnz;
1792   c->solve_work         = 0;
1793   c->mult_work          = 0;
1794   *B = C;
1795   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1801 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1802 {
1803   Mat_SeqSBAIJ   *a;
1804   Mat            B;
1805   PetscErrorCode ierr;
1806   int            fd;
1807   PetscMPIInt    size;
1808   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
1809   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1810   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
1811   PetscInt       *masked,nmask,tmp,bs2,ishift;
1812   PetscScalar    *aa;
1813   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1814 
1815   PetscFunctionBegin;
1816   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1817   bs2  = bs*bs;
1818 
1819   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1820   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1821   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1822   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1823   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1824   M = header[1]; N = header[2]; nz = header[3];
1825 
1826   if (header[3] < 0) {
1827     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1828   }
1829 
1830   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1831 
1832   /*
1833      This code adds extra rows to make sure the number of rows is
1834     divisible by the blocksize
1835   */
1836   mbs        = M/bs;
1837   extra_rows = bs - M + bs*(mbs);
1838   if (extra_rows == bs) extra_rows = 0;
1839   else                  mbs++;
1840   if (extra_rows) {
1841     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1842   }
1843 
1844   /* read in row lengths */
1845   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1846   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1847   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1848 
1849   /* read in column indices */
1850   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1851   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1852   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1853 
1854   /* loop over row lengths determining block row lengths */
1855   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
1856   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1857   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
1858   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1859   masked   = mask + mbs;
1860   rowcount = 0; nzcount = 0;
1861   for (i=0; i<mbs; i++) {
1862     nmask = 0;
1863     for (j=0; j<bs; j++) {
1864       kmax = rowlengths[rowcount];
1865       for (k=0; k<kmax; k++) {
1866         tmp = jj[nzcount++]/bs;   /* block col. index */
1867         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1868       }
1869       rowcount++;
1870     }
1871     s_browlengths[i] += nmask;
1872 
1873     /* zero out the mask elements we set */
1874     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1875   }
1876 
1877   /* create our matrix */
1878   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1879   ierr = MatSetType(B,type);CHKERRQ(ierr);
1880   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1881   a = (Mat_SeqSBAIJ*)B->data;
1882 
1883   /* set matrix "i" values */
1884   a->i[0] = 0;
1885   for (i=1; i<= mbs; i++) {
1886     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1887     a->ilen[i-1] = s_browlengths[i-1];
1888   }
1889   a->nz = a->i[mbs];
1890 
1891   /* read in nonzero values */
1892   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1893   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1894   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1895 
1896   /* set "a" and "j" values into matrix */
1897   nzcount = 0; jcount = 0;
1898   for (i=0; i<mbs; i++) {
1899     nzcountb = nzcount;
1900     nmask    = 0;
1901     for (j=0; j<bs; j++) {
1902       kmax = rowlengths[i*bs+j];
1903       for (k=0; k<kmax; k++) {
1904         tmp = jj[nzcount++]/bs; /* block col. index */
1905         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1906       }
1907     }
1908     /* sort the masked values */
1909     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1910 
1911     /* set "j" values into matrix */
1912     maskcount = 1;
1913     for (j=0; j<nmask; j++) {
1914       a->j[jcount++]  = masked[j];
1915       mask[masked[j]] = maskcount++;
1916     }
1917 
1918     /* set "a" values into matrix */
1919     ishift = bs2*a->i[i];
1920     for (j=0; j<bs; j++) {
1921       kmax = rowlengths[i*bs+j];
1922       for (k=0; k<kmax; k++) {
1923         tmp       = jj[nzcountb]/bs ; /* block col. index */
1924         if (tmp >= i){
1925           block     = mask[tmp] - 1;
1926           point     = jj[nzcountb] - bs*tmp;
1927           idx       = ishift + bs2*block + j + bs*point;
1928           a->a[idx] = aa[nzcountb];
1929         }
1930         nzcountb++;
1931       }
1932     }
1933     /* zero out the mask elements we set */
1934     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1935   }
1936   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1937 
1938   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1939   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1940   ierr = PetscFree(aa);CHKERRQ(ierr);
1941   ierr = PetscFree(jj);CHKERRQ(ierr);
1942   ierr = PetscFree(mask);CHKERRQ(ierr);
1943 
1944   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1945   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1946   ierr = MatView_Private(B);CHKERRQ(ierr);
1947   *A = B;
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1953 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1954 {
1955   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1956   MatScalar      *aa=a->a,*v,*v1;
1957   PetscScalar    *x,*b,*t,sum,d;
1958   PetscErrorCode ierr;
1959   PetscInt       m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j;
1960   PetscInt       nz,nz1,*vj,*vj1,i;
1961 
1962   PetscFunctionBegin;
1963   its = its*lits;
1964   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1965 
1966   if (bs > 1)
1967     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1968 
1969   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1970   if (xx != bb) {
1971     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1972   } else {
1973     b = x;
1974   }
1975 
1976   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1977 
1978   if (flag & SOR_ZERO_INITIAL_GUESS) {
1979     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1980       for (i=0; i<m; i++)
1981         t[i] = b[i];
1982 
1983       for (i=0; i<m; i++){
1984         d  = *(aa + ai[i]);  /* diag[i] */
1985         v  = aa + ai[i] + 1;
1986         vj = aj + ai[i] + 1;
1987         nz = ai[i+1] - ai[i] - 1;
1988         x[i] = omega*t[i]/d;
1989         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1990         PetscLogFlops(2*nz-1);
1991       }
1992     }
1993 
1994     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1995       for (i=0; i<m; i++)
1996         t[i] = b[i];
1997 
1998       for (i=0; i<m-1; i++){  /* update rhs */
1999         v  = aa + ai[i] + 1;
2000         vj = aj + ai[i] + 1;
2001         nz = ai[i+1] - ai[i] - 1;
2002         while (nz--) t[*vj++] -= x[i]*(*v++);
2003         PetscLogFlops(2*nz-1);
2004       }
2005       for (i=m-1; i>=0; i--){
2006         d  = *(aa + ai[i]);
2007         v  = aa + ai[i] + 1;
2008         vj = aj + ai[i] + 1;
2009         nz = ai[i+1] - ai[i] - 1;
2010         sum = t[i];
2011         while (nz--) sum -= x[*vj++]*(*v++);
2012         PetscLogFlops(2*nz-1);
2013         x[i] =   (1-omega)*x[i] + omega*sum/d;
2014       }
2015     }
2016     its--;
2017   }
2018 
2019   while (its--) {
2020     /*
2021        forward sweep:
2022        for i=0,...,m-1:
2023          sum[i] = (b[i] - U(i,:)x )/d[i];
2024          x[i]   = (1-omega)x[i] + omega*sum[i];
2025          b      = b - x[i]*U^T(i,:);
2026 
2027     */
2028     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2029       for (i=0; i<m; i++)
2030         t[i] = b[i];
2031 
2032       for (i=0; i<m; i++){
2033         d  = *(aa + ai[i]);  /* diag[i] */
2034         v  = aa + ai[i] + 1; v1=v;
2035         vj = aj + ai[i] + 1; vj1=vj;
2036         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2037         sum = t[i];
2038         while (nz1--) sum -= (*v1++)*x[*vj1++];
2039         x[i] = (1-omega)*x[i] + omega*sum/d;
2040         while (nz--) t[*vj++] -= x[i]*(*v++);
2041         PetscLogFlops(4*nz-2);
2042       }
2043     }
2044 
2045   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2046       /*
2047        backward sweep:
2048        b = b - x[i]*U^T(i,:), i=0,...,n-2
2049        for i=m-1,...,0:
2050          sum[i] = (b[i] - U(i,:)x )/d[i];
2051          x[i]   = (1-omega)x[i] + omega*sum[i];
2052       */
2053       for (i=0; i<m; i++)
2054         t[i] = b[i];
2055 
2056       for (i=0; i<m-1; i++){  /* update rhs */
2057         v  = aa + ai[i] + 1;
2058         vj = aj + ai[i] + 1;
2059         nz = ai[i+1] - ai[i] - 1;
2060         while (nz--) t[*vj++] -= x[i]*(*v++);
2061         PetscLogFlops(2*nz-1);
2062       }
2063       for (i=m-1; i>=0; i--){
2064         d  = *(aa + ai[i]);
2065         v  = aa + ai[i] + 1;
2066         vj = aj + ai[i] + 1;
2067         nz = ai[i+1] - ai[i] - 1;
2068         sum = t[i];
2069         while (nz--) sum -= x[*vj++]*(*v++);
2070         PetscLogFlops(2*nz-1);
2071         x[i] =   (1-omega)*x[i] + omega*sum/d;
2072       }
2073     }
2074   }
2075 
2076   ierr = PetscFree(t);CHKERRQ(ierr);
2077   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2078   if (bb != xx) {
2079     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2080   }
2081 
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 
2086 
2087 
2088 
2089 
2090