xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision b2da817d11b8f98ec4a15fcfa65f2fa8cfedc826)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/inline/spops.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 int MatMissingDiagonal_SeqSBAIJ(Mat A)
19 {
20   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
21   int         *diag,*jj = a->j,i,ierr;
22 
23   PetscFunctionBegin;
24   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
25   diag = a->diag;
26   for (i=0; i<a->mbs; i++) {
27     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
28   }
29   PetscFunctionReturn(0);
30 }
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
34 int MatMarkDiagonal_SeqSBAIJ(Mat A)
35 {
36   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
37   int          i,mbs = a->mbs,ierr;
38 
39   PetscFunctionBegin;
40   if (a->diag) PetscFunctionReturn(0);
41 
42   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
43   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
44   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
50 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
51 {
52   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
53   int         n = a->mbs,i;
54 
55   PetscFunctionBegin;
56   *nn = n;
57   if (!ia) PetscFunctionReturn(0);
58 
59   if (oshift == 1) {
60     /* temporarily add 1 to i and j indices */
61     int nz = a->i[n];
62     for (i=0; i<nz; i++) a->j[i]++;
63     for (i=0; i<n+1; i++) a->i[i]++;
64     *ia = a->i; *ja = a->j;
65   } else {
66     *ia = a->i; *ja = a->j;
67   }
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
73 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
74 {
75   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
76   int         i,n = a->mbs;
77 
78   PetscFunctionBegin;
79   if (!ia) PetscFunctionReturn(0);
80 
81   if (oshift == 1) {
82     int nz = a->i[n]-1;
83     for (i=0; i<nz; i++) a->j[i]--;
84     for (i=0; i<n+1; i++) a->i[i]--;
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNCT__
90 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
91 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
92 {
93   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
94 
95   PetscFunctionBegin;
96   *bs = sbaij->bs;
97   PetscFunctionReturn(0);
98 }
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
102 int MatDestroy_SeqSBAIJ(Mat A)
103 {
104   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
105   int         ierr;
106 
107   PetscFunctionBegin;
108 #if defined(PETSC_USE_LOG)
109   PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz);
110 #endif
111   ierr = PetscFree(a->a);CHKERRQ(ierr);
112   if (!a->singlemalloc) {
113     ierr = PetscFree(a->i);CHKERRQ(ierr);
114     ierr = PetscFree(a->j);CHKERRQ(ierr);
115   }
116   if (a->row) {
117     ierr = ISDestroy(a->row);CHKERRQ(ierr);
118   }
119   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
120   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
121   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
122   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
123   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
124   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
125   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
126   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
127   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
128 
129   if (a->inew){
130     ierr = PetscFree(a->inew);CHKERRQ(ierr);
131     a->inew = 0;
132   }
133   ierr = PetscFree(a);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
139 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
140 {
141   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
142 
143   PetscFunctionBegin;
144   switch (op) {
145   case MAT_ROW_ORIENTED:
146     a->roworiented    = PETSC_TRUE;
147     break;
148   case MAT_COLUMN_ORIENTED:
149     a->roworiented    = PETSC_FALSE;
150     break;
151   case MAT_COLUMNS_SORTED:
152     a->sorted         = PETSC_TRUE;
153     break;
154   case MAT_COLUMNS_UNSORTED:
155     a->sorted         = PETSC_FALSE;
156     break;
157   case MAT_KEEP_ZEROED_ROWS:
158     a->keepzeroedrows = PETSC_TRUE;
159     break;
160   case MAT_NO_NEW_NONZERO_LOCATIONS:
161     a->nonew          = 1;
162     break;
163   case MAT_NEW_NONZERO_LOCATION_ERR:
164     a->nonew          = -1;
165     break;
166   case MAT_NEW_NONZERO_ALLOCATION_ERR:
167     a->nonew          = -2;
168     break;
169   case MAT_YES_NEW_NONZERO_LOCATIONS:
170     a->nonew          = 0;
171     break;
172   case MAT_ROWS_SORTED:
173   case MAT_ROWS_UNSORTED:
174   case MAT_YES_NEW_DIAGONALS:
175   case MAT_IGNORE_OFF_PROC_ENTRIES:
176   case MAT_USE_HASH_TABLE:
177     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
178     break;
179   case MAT_NO_NEW_DIAGONALS:
180     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
181   case MAT_NOT_SYMMETRIC:
182   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
183   case MAT_HERMITIAN:
184     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
185   case MAT_SYMMETRIC:
186   case MAT_STRUCTURALLY_SYMMETRIC:
187   case MAT_NOT_HERMITIAN:
188   case MAT_SYMMETRY_ETERNAL:
189   case MAT_NOT_SYMMETRY_ETERNAL:
190     break;
191   default:
192     SETERRQ(PETSC_ERR_SUP,"unknown option");
193   }
194   PetscFunctionReturn(0);
195 }
196 
197 #undef __FUNCT__
198 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
199 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
200 {
201   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
202   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
203   MatScalar    *aa,*aa_i;
204   PetscScalar  *v_i;
205 
206   PetscFunctionBegin;
207   bs  = a->bs;
208   ai  = a->i;
209   aj  = a->j;
210   aa  = a->a;
211   bs2 = a->bs2;
212 
213   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
214 
215   bn  = row/bs;   /* Block number */
216   bp  = row % bs; /* Block position */
217   M   = ai[bn+1] - ai[bn];
218   *ncols = bs*M;
219 
220   if (v) {
221     *v = 0;
222     if (*ncols) {
223       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
224       for (i=0; i<M; i++) { /* for each block in the block row */
225         v_i  = *v + i*bs;
226         aa_i = aa + bs2*(ai[bn] + i);
227         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
228       }
229     }
230   }
231 
232   if (cols) {
233     *cols = 0;
234     if (*ncols) {
235       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
236       for (i=0; i<M; i++) { /* for each block in the block row */
237         cols_i = *cols + i*bs;
238         itmp  = bs*aj[ai[bn] + i];
239         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
240       }
241     }
242   }
243 
244   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
245   /* this segment is currently removed, so only entries in the upper triangle are obtained */
246 #ifdef column_search
247   v_i    = *v    + M*bs;
248   cols_i = *cols + M*bs;
249   for (i=0; i<bn; i++){ /* for each block row */
250     M = ai[i+1] - ai[i];
251     for (j=0; j<M; j++){
252       itmp = aj[ai[i] + j];    /* block column value */
253       if (itmp == bn){
254         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
255         for (k=0; k<bs; k++) {
256           *cols_i++ = i*bs+k;
257           *v_i++    = aa_i[k];
258         }
259         *ncols += bs;
260         break;
261       }
262     }
263   }
264 #endif
265 
266   PetscFunctionReturn(0);
267 }
268 
269 #undef __FUNCT__
270 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
271 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
272 {
273   int ierr;
274 
275   PetscFunctionBegin;
276   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
277   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
283 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
284 {
285   int ierr;
286   PetscFunctionBegin;
287   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
288   PetscFunctionReturn(0);
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
293 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
294 {
295   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
296   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
297   char              *name;
298   PetscViewerFormat format;
299 
300   PetscFunctionBegin;
301   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
302   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
303   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
304     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
305   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
306     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
307   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
308     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
309     for (i=0; i<a->mbs; i++) {
310       for (j=0; j<bs; j++) {
311         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
312         for (k=a->i[i]; k<a->i[i+1]; k++) {
313           for (l=0; l<bs; l++) {
314 #if defined(PETSC_USE_COMPLEX)
315             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
316               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
317                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
318             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
319               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
320                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
321             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
322               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
323             }
324 #else
325             if (a->a[bs2*k + l*bs + j] != 0.0) {
326               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
327             }
328 #endif
329           }
330         }
331         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
332       }
333     }
334     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
335   } else {
336     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
337     for (i=0; i<a->mbs; i++) {
338       for (j=0; j<bs; j++) {
339         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
340         for (k=a->i[i]; k<a->i[i+1]; k++) {
341           for (l=0; l<bs; l++) {
342 #if defined(PETSC_USE_COMPLEX)
343             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
344               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
345                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
346             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
347               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
348                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
349             } else {
350               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
351             }
352 #else
353             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
354 #endif
355           }
356         }
357         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
358       }
359     }
360     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
361   }
362   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
363   PetscFunctionReturn(0);
364 }
365 
366 #undef __FUNCT__
367 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
368 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
369 {
370   Mat          A = (Mat) Aa;
371   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
372   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
373   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
374   MatScalar    *aa;
375   MPI_Comm     comm;
376   PetscViewer  viewer;
377 
378   PetscFunctionBegin;
379   /*
380       This is nasty. If this is called from an originally parallel matrix
381    then all processes call this,but only the first has the matrix so the
382    rest should return immediately.
383   */
384   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
385   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
386   if (rank) PetscFunctionReturn(0);
387 
388   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
389 
390   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
391   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
392 
393   /* loop over matrix elements drawing boxes */
394   color = PETSC_DRAW_BLUE;
395   for (i=0,row=0; i<mbs; i++,row+=bs) {
396     for (j=a->i[i]; j<a->i[i+1]; j++) {
397       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
398       x_l = a->j[j]*bs; x_r = x_l + 1.0;
399       aa = a->a + j*bs2;
400       for (k=0; k<bs; k++) {
401         for (l=0; l<bs; l++) {
402           if (PetscRealPart(*aa++) >=  0.) continue;
403           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
404         }
405       }
406     }
407   }
408   color = PETSC_DRAW_CYAN;
409   for (i=0,row=0; i<mbs; i++,row+=bs) {
410     for (j=a->i[i]; j<a->i[i+1]; j++) {
411       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
412       x_l = a->j[j]*bs; x_r = x_l + 1.0;
413       aa = a->a + j*bs2;
414       for (k=0; k<bs; k++) {
415         for (l=0; l<bs; l++) {
416           if (PetscRealPart(*aa++) != 0.) continue;
417           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
418         }
419       }
420     }
421   }
422 
423   color = PETSC_DRAW_RED;
424   for (i=0,row=0; i<mbs; i++,row+=bs) {
425     for (j=a->i[i]; j<a->i[i+1]; j++) {
426       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
427       x_l = a->j[j]*bs; x_r = x_l + 1.0;
428       aa = a->a + j*bs2;
429       for (k=0; k<bs; k++) {
430         for (l=0; l<bs; l++) {
431           if (PetscRealPart(*aa++) <= 0.) continue;
432           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
433         }
434       }
435     }
436   }
437   PetscFunctionReturn(0);
438 }
439 
440 #undef __FUNCT__
441 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
442 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
443 {
444   int          ierr;
445   PetscReal    xl,yl,xr,yr,w,h;
446   PetscDraw    draw;
447   PetscTruth   isnull;
448 
449   PetscFunctionBegin;
450 
451   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
452   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
453 
454   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
455   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
456   xr += w;    yr += h;  xl = -w;     yl = -h;
457   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
458   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
459   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatView_SeqSBAIJ"
465 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
466 {
467   int        ierr;
468   PetscTruth isascii,isdraw;
469 
470   PetscFunctionBegin;
471   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
472   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
473   if (isascii){
474     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
475   } else if (isdraw) {
476     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
477   } else {
478     Mat B;
479     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
480     ierr = MatView(B,viewer);CHKERRQ(ierr);
481     ierr = MatDestroy(B);CHKERRQ(ierr);
482   }
483   PetscFunctionReturn(0);
484 }
485 
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
489 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
490 {
491   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
492   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
493   int        *ai = a->i,*ailen = a->ilen;
494   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
495   MatScalar  *ap,*aa = a->a,zero = 0.0;
496 
497   PetscFunctionBegin;
498   for (k=0; k<m; k++) { /* loop over rows */
499     row  = im[k]; brow = row/bs;
500     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
501     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
502     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
503     nrow = ailen[brow];
504     for (l=0; l<n; l++) { /* loop over columns */
505       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
506       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
507       col  = in[l] ;
508       bcol = col/bs;
509       cidx = col%bs;
510       ridx = row%bs;
511       high = nrow;
512       low  = 0; /* assume unsorted */
513       while (high-low > 5) {
514         t = (low+high)/2;
515         if (rp[t] > bcol) high = t;
516         else             low  = t;
517       }
518       for (i=low; i<high; i++) {
519         if (rp[i] > bcol) break;
520         if (rp[i] == bcol) {
521           *v++ = ap[bs2*i+bs*cidx+ridx];
522           goto finished;
523         }
524       }
525       *v++ = zero;
526       finished:;
527     }
528   }
529   PetscFunctionReturn(0);
530 }
531 
532 
533 #undef __FUNCT__
534 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
535 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
536 {
537   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
538   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
539   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
540   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
541   PetscTruth      roworiented=a->roworiented;
542   const MatScalar *value = v;
543   MatScalar       *ap,*aa = a->a,*bap;
544 
545   PetscFunctionBegin;
546   if (roworiented) {
547     stepval = (n-1)*bs;
548   } else {
549     stepval = (m-1)*bs;
550   }
551   for (k=0; k<m; k++) { /* loop over added rows */
552     row  = im[k];
553     if (row < 0) continue;
554 #if defined(PETSC_USE_BOPT_g)
555     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
556 #endif
557     rp   = aj + ai[row];
558     ap   = aa + bs2*ai[row];
559     rmax = imax[row];
560     nrow = ailen[row];
561     low  = 0;
562     for (l=0; l<n; l++) { /* loop over added columns */
563       if (in[l] < 0) continue;
564       col = in[l];
565 #if defined(PETSC_USE_BOPT_g)
566       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
567 #endif
568       if (col < row) continue; /* ignore lower triangular block */
569       if (roworiented) {
570         value = v + k*(stepval+bs)*bs + l*bs;
571       } else {
572         value = v + l*(stepval+bs)*bs + k*bs;
573       }
574       if (!sorted) low = 0; high = nrow;
575       while (high-low > 7) {
576         t = (low+high)/2;
577         if (rp[t] > col) high = t;
578         else             low  = t;
579       }
580       for (i=low; i<high; i++) {
581         if (rp[i] > col) break;
582         if (rp[i] == col) {
583           bap  = ap +  bs2*i;
584           if (roworiented) {
585             if (is == ADD_VALUES) {
586               for (ii=0; ii<bs; ii++,value+=stepval) {
587                 for (jj=ii; jj<bs2; jj+=bs) {
588                   bap[jj] += *value++;
589                 }
590               }
591             } else {
592               for (ii=0; ii<bs; ii++,value+=stepval) {
593                 for (jj=ii; jj<bs2; jj+=bs) {
594                   bap[jj] = *value++;
595                 }
596               }
597             }
598           } else {
599             if (is == ADD_VALUES) {
600               for (ii=0; ii<bs; ii++,value+=stepval) {
601                 for (jj=0; jj<bs; jj++) {
602                   *bap++ += *value++;
603                 }
604               }
605             } else {
606               for (ii=0; ii<bs; ii++,value+=stepval) {
607                 for (jj=0; jj<bs; jj++) {
608                   *bap++  = *value++;
609                 }
610               }
611             }
612           }
613           goto noinsert2;
614         }
615       }
616       if (nonew == 1) goto noinsert2;
617       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
618       if (nrow >= rmax) {
619         /* there is no extra room in row, therefore enlarge */
620         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
621         MatScalar *new_a;
622 
623         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
624 
625         /* malloc new storage space */
626         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
627 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
628         new_j   = (int*)(new_a + bs2*new_nz);
629         new_i   = new_j + new_nz;
630 
631         /* copy over old data into new slots */
632         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
633         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
634         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
635         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
636         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
637         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
638         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
639         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
640         /* free up old matrix storage */
641         ierr = PetscFree(a->a);CHKERRQ(ierr);
642         if (!a->singlemalloc) {
643           ierr = PetscFree(a->i);CHKERRQ(ierr);
644           ierr = PetscFree(a->j);CHKERRQ(ierr);
645         }
646         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
647         a->singlemalloc = PETSC_TRUE;
648 
649         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
650         rmax = imax[row] = imax[row] + CHUNKSIZE;
651         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
652         a->maxnz += bs2*CHUNKSIZE;
653         a->reallocs++;
654         a->nz++;
655       }
656       N = nrow++ - 1;
657       /* shift up all the later entries in this row */
658       for (ii=N; ii>=i; ii--) {
659         rp[ii+1] = rp[ii];
660         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
661       }
662       if (N >= i) {
663         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
664       }
665       rp[i] = col;
666       bap   = ap +  bs2*i;
667       if (roworiented) {
668         for (ii=0; ii<bs; ii++,value+=stepval) {
669           for (jj=ii; jj<bs2; jj+=bs) {
670             bap[jj] = *value++;
671           }
672         }
673       } else {
674         for (ii=0; ii<bs; ii++,value+=stepval) {
675           for (jj=0; jj<bs; jj++) {
676             *bap++  = *value++;
677           }
678         }
679       }
680       noinsert2:;
681       low = i;
682     }
683     ailen[row] = nrow;
684   }
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
690 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
691 {
692   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
693   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
694   int        m = A->m,*ip,N,*ailen = a->ilen;
695   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
696   MatScalar  *aa = a->a,*ap;
697 
698   PetscFunctionBegin;
699   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
700 
701   if (m) rmax = ailen[0];
702   for (i=1; i<mbs; i++) {
703     /* move each row back by the amount of empty slots (fshift) before it*/
704     fshift += imax[i-1] - ailen[i-1];
705     rmax   = PetscMax(rmax,ailen[i]);
706     if (fshift) {
707       ip = aj + ai[i]; ap = aa + bs2*ai[i];
708       N = ailen[i];
709       for (j=0; j<N; j++) {
710         ip[j-fshift] = ip[j];
711         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
712       }
713     }
714     ai[i] = ai[i-1] + ailen[i-1];
715   }
716   if (mbs) {
717     fshift += imax[mbs-1] - ailen[mbs-1];
718     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
719   }
720   /* reset ilen and imax for each row */
721   for (i=0; i<mbs; i++) {
722     ailen[i] = imax[i] = ai[i+1] - ai[i];
723   }
724   a->nz = ai[mbs];
725 
726   /* diagonals may have moved, reset it */
727   if (a->diag) {
728     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
729   }
730   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
731            m,A->m,a->bs,fshift*bs2,a->nz*bs2);
732   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
733            a->reallocs);
734   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
735   a->reallocs          = 0;
736   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
737 
738   PetscFunctionReturn(0);
739 }
740 
741 /*
742    This function returns an array of flags which indicate the locations of contiguous
743    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
744    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
745    Assume: sizes should be long enough to hold all the values.
746 */
747 #undef __FUNCT__
748 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
749 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
750 {
751   int        i,j,k,row;
752   PetscTruth flg;
753 
754   PetscFunctionBegin;
755   for (i=0,j=0; i<n; j++) {
756     row = idx[i];
757     if (row%bs!=0) { /* Not the begining of a block */
758       sizes[j] = 1;
759       i++;
760     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
761       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
762       i++;
763     } else { /* Begining of the block, so check if the complete block exists */
764       flg = PETSC_TRUE;
765       for (k=1; k<bs; k++) {
766         if (row+k != idx[i+k]) { /* break in the block */
767           flg = PETSC_FALSE;
768           break;
769         }
770       }
771       if (flg == PETSC_TRUE) { /* No break in the bs */
772         sizes[j] = bs;
773         i+= bs;
774       } else {
775         sizes[j] = 1;
776         i++;
777       }
778     }
779   }
780   *bs_max = j;
781   PetscFunctionReturn(0);
782 }
783 
784 #undef __FUNCT__
785 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
786 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
787 {
788   PetscFunctionBegin;
789   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
790 }
791 
792 /* Only add/insert a(i,j) with i<=j (blocks).
793    Any a(i,j) with i>j input by user is ingored.
794 */
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
798 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
799 {
800   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
801   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
802   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
803   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
804   int         ridx,cidx,bs2=a->bs2,ierr;
805   MatScalar   *ap,value,*aa=a->a,*bap;
806 
807   PetscFunctionBegin;
808 
809   for (k=0; k<m; k++) { /* loop over added rows */
810     row  = im[k];       /* row number */
811     brow = row/bs;      /* block row number */
812     if (row < 0) continue;
813 #if defined(PETSC_USE_BOPT_g)
814     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
815 #endif
816     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
817     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
818     rmax = imax[brow];  /* maximum space allocated for this row */
819     nrow = ailen[brow]; /* actual length of this row */
820     low  = 0;
821 
822     for (l=0; l<n; l++) { /* loop over added columns */
823       if (in[l] < 0) continue;
824 #if defined(PETSC_USE_BOPT_g)
825       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
826 #endif
827       col = in[l];
828       bcol = col/bs;              /* block col number */
829 
830       if (brow <= bcol){
831         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
832         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
833           /* element value a(k,l) */
834           if (roworiented) {
835             value = v[l + k*n];
836           } else {
837             value = v[k + l*m];
838           }
839 
840           /* move pointer bap to a(k,l) quickly and add/insert value */
841           if (!sorted) low = 0; high = nrow;
842           while (high-low > 7) {
843             t = (low+high)/2;
844             if (rp[t] > bcol) high = t;
845             else              low  = t;
846           }
847           for (i=low; i<high; i++) {
848             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
849             if (rp[i] > bcol) break;
850             if (rp[i] == bcol) {
851               bap  = ap +  bs2*i + bs*cidx + ridx;
852               if (is == ADD_VALUES) *bap += value;
853               else                  *bap  = value;
854               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
855               if (brow == bcol && ridx < cidx){
856                 bap  = ap +  bs2*i + bs*ridx + cidx;
857                 if (is == ADD_VALUES) *bap += value;
858                 else                  *bap  = value;
859               }
860               goto noinsert1;
861             }
862           }
863 
864       if (nonew == 1) goto noinsert1;
865       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
866       if (nrow >= rmax) {
867         /* there is no extra room in row, therefore enlarge */
868         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
869         MatScalar *new_a;
870 
871         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
872 
873         /* Malloc new storage space */
874         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
875         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
876         new_j = (int*)(new_a + bs2*new_nz);
877         new_i = new_j + new_nz;
878 
879         /* copy over old data into new slots */
880         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
881         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
882         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
883         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
884         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
885         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
886         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
887         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
888         /* free up old matrix storage */
889         ierr = PetscFree(a->a);CHKERRQ(ierr);
890         if (!a->singlemalloc) {
891           ierr = PetscFree(a->i);CHKERRQ(ierr);
892           ierr = PetscFree(a->j);CHKERRQ(ierr);
893         }
894         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
895         a->singlemalloc = PETSC_TRUE;
896 
897         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
898         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
899         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
900         a->maxnz += bs2*CHUNKSIZE;
901         a->reallocs++;
902         a->nz++;
903       }
904 
905       N = nrow++ - 1;
906       /* shift up all the later entries in this row */
907       for (ii=N; ii>=i; ii--) {
908         rp[ii+1] = rp[ii];
909         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
910       }
911       if (N>=i) {
912         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
913       }
914       rp[i]                      = bcol;
915       ap[bs2*i + bs*cidx + ridx] = value;
916       noinsert1:;
917       low = i;
918       /* } */
919         }
920       } /* end of if .. if..  */
921     }                     /* end of loop over added columns */
922     ailen[brow] = nrow;
923   }                       /* end of loop over added rows */
924 
925   PetscFunctionReturn(0);
926 }
927 
928 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
929 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
930 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
931 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
932 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
933 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
934 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
935 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
936 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
937 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
938 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
939 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
940 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
941 extern int MatZeroEntries_SeqSBAIJ(Mat);
942 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
943 
944 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
945 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
946 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
947 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
948 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
949 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
950 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
951 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
952 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
953 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
954 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
955 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
956 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
957 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
958 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
959 
960 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
961 
962 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
963 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
964 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
965 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
966 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
967 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
968 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
969 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
970 
971 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
972 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
973 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
974 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
975 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
976 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
977 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
978 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
979 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
980 
981 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
982 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
983 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
984 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
985 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
986 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
987 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
988 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
989 
990 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
991 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
992 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
993 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
994 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
995 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
996 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
997 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
998 
999 #ifdef HAVE_MatICCFactor
1000 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1003 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1004 {
1005   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1006   Mat         outA;
1007   int         ierr;
1008   PetscTruth  row_identity,col_identity;
1009 
1010   PetscFunctionBegin;
1011   /*
1012   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1013   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1014   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1015   if (!row_identity || !col_identity) {
1016     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1017   }
1018   */
1019 
1020   outA          = inA;
1021   inA->factor   = FACTOR_CHOLESKY;
1022 
1023   if (!a->diag) {
1024     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1025   }
1026   /*
1027       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1028       for ILU(0) factorization with natural ordering
1029   */
1030   switch (a->bs) {
1031   case 1:
1032     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1033     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1034     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1035     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1036   case 2:
1037     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1038     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1039     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1040     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1041     break;
1042   case 3:
1043     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1044     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1045     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1046     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1047     break;
1048   case 4:
1049     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1050     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1051     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1052     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1053     break;
1054   case 5:
1055     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1056     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1057     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1058     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1059     break;
1060   case 6:
1061     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1062     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1063     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1064     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1065     break;
1066   case 7:
1067     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1068     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1069     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1070     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1071     break;
1072   default:
1073     a->row        = row;
1074     a->icol       = col;
1075     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1076     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1077 
1078     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1079     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1080     PetscLogObjectParent(inA,a->icol);
1081 
1082     if (!a->solve_work) {
1083       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1084       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1085     }
1086   }
1087 
1088   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1089 
1090   PetscFunctionReturn(0);
1091 }
1092 #endif
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1096 int MatPrintHelp_SeqSBAIJ(Mat A)
1097 {
1098   static PetscTruth called = PETSC_FALSE;
1099   MPI_Comm          comm = A->comm;
1100   int               ierr;
1101 
1102   PetscFunctionBegin;
1103   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1104   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1105   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 EXTERN_C_BEGIN
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1112 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1113 {
1114   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1115   int         i,nz,n;
1116 
1117   PetscFunctionBegin;
1118   nz = baij->maxnz;
1119   n  = mat->n;
1120   for (i=0; i<nz; i++) {
1121     baij->j[i] = indices[i];
1122   }
1123   baij->nz = nz;
1124   for (i=0; i<n; i++) {
1125     baij->ilen[i] = baij->imax[i];
1126   }
1127 
1128   PetscFunctionReturn(0);
1129 }
1130 EXTERN_C_END
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1134 /*@
1135     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1136        in the matrix.
1137 
1138   Input Parameters:
1139 +  mat     - the SeqSBAIJ matrix
1140 -  indices - the column indices
1141 
1142   Level: advanced
1143 
1144   Notes:
1145     This can be called if you have precomputed the nonzero structure of the
1146   matrix and want to provide it to the matrix object to improve the performance
1147   of the MatSetValues() operation.
1148 
1149     You MUST have set the correct numbers of nonzeros per row in the call to
1150   MatCreateSeqSBAIJ().
1151 
1152     MUST be called before any calls to MatSetValues()
1153 
1154 .seealso: MatCreateSeqSBAIJ
1155 @*/
1156 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1157 {
1158   int ierr,(*f)(Mat,int *);
1159 
1160   PetscFunctionBegin;
1161   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1162   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1163   if (f) {
1164     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1165   } else {
1166     SETERRQ(1,"Wrong type of matrix to set column indices");
1167   }
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1173 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1174 {
1175   int        ierr;
1176 
1177   PetscFunctionBegin;
1178   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1184 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1185 {
1186   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1187   PetscFunctionBegin;
1188   *array = a->a;
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1194 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1195 {
1196   PetscFunctionBegin;
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #include "petscblaslapack.h"
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1203 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1204 {
1205   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1206   int          ierr,one=1,i,bs=y->bs,bs2,j;
1207 
1208   PetscFunctionBegin;
1209   if (str == SAME_NONZERO_PATTERN) {
1210     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1211   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1212     if (y->xtoy && y->XtoY != X) {
1213       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1214       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1215     }
1216     if (!y->xtoy) { /* get xtoy */
1217       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1218       y->XtoY = X;
1219     }
1220     bs2 = bs*bs;
1221     for (i=0; i<x->nz; i++) {
1222       j = 0;
1223       while (j < bs2){
1224         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1225         j++;
1226       }
1227     }
1228     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1229   } else {
1230     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1237 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1238 {
1239   PetscFunctionBegin;
1240   *flg = PETSC_TRUE;
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1246 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1247 {
1248   PetscFunctionBegin;
1249   *flg = PETSC_TRUE;
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1255 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1256 {
1257   PetscFunctionBegin;
1258   *flg = PETSC_FALSE;
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 /* -------------------------------------------------------------------*/
1263 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1264        MatGetRow_SeqSBAIJ,
1265        MatRestoreRow_SeqSBAIJ,
1266        MatMult_SeqSBAIJ_N,
1267 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1268        MatMultTranspose_SeqSBAIJ,
1269        MatMultTransposeAdd_SeqSBAIJ,
1270        MatSolve_SeqSBAIJ_N,
1271        0,
1272        0,
1273 /*10*/ 0,
1274        0,
1275        MatCholeskyFactor_SeqSBAIJ,
1276        MatRelax_SeqSBAIJ,
1277        MatTranspose_SeqSBAIJ,
1278 /*15*/ MatGetInfo_SeqSBAIJ,
1279        MatEqual_SeqSBAIJ,
1280        MatGetDiagonal_SeqSBAIJ,
1281        MatDiagonalScale_SeqSBAIJ,
1282        MatNorm_SeqSBAIJ,
1283 /*20*/ 0,
1284        MatAssemblyEnd_SeqSBAIJ,
1285        0,
1286        MatSetOption_SeqSBAIJ,
1287        MatZeroEntries_SeqSBAIJ,
1288 /*25*/ MatZeroRows_SeqSBAIJ,
1289        0,
1290        0,
1291        MatCholeskyFactorSymbolic_SeqSBAIJ,
1292        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1293 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1294        0,
1295        MatICCFactorSymbolic_SeqSBAIJ,
1296        MatGetArray_SeqSBAIJ,
1297        MatRestoreArray_SeqSBAIJ,
1298 /*35*/ MatDuplicate_SeqSBAIJ,
1299        0,
1300        0,
1301        0,
1302        0,
1303 /*40*/ MatAXPY_SeqSBAIJ,
1304        MatGetSubMatrices_SeqSBAIJ,
1305        MatIncreaseOverlap_SeqSBAIJ,
1306        MatGetValues_SeqSBAIJ,
1307        0,
1308 /*45*/ MatPrintHelp_SeqSBAIJ,
1309        MatScale_SeqSBAIJ,
1310        0,
1311        0,
1312        0,
1313 /*50*/ MatGetBlockSize_SeqSBAIJ,
1314        MatGetRowIJ_SeqSBAIJ,
1315        MatRestoreRowIJ_SeqSBAIJ,
1316        0,
1317        0,
1318 /*55*/ 0,
1319        0,
1320        0,
1321        0,
1322        MatSetValuesBlocked_SeqSBAIJ,
1323 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1324        0,
1325        0,
1326        MatGetPetscMaps_Petsc,
1327        0,
1328 /*65*/ 0,
1329        0,
1330        0,
1331        0,
1332        0,
1333 /*70*/ MatGetRowMax_SeqSBAIJ,
1334        0,
1335        0,
1336        0,
1337        0,
1338 /*75*/ 0,
1339        0,
1340        0,
1341        0,
1342        0,
1343 /*80*/ 0,
1344        0,
1345        0,
1346 #if !defined(PETSC_USE_COMPLEX)
1347        MatGetInertia_SeqSBAIJ,
1348 #else
1349        0,
1350 #endif
1351 /*85*/ MatLoad_SeqSBAIJ,
1352        MatIsSymmetric_SeqSBAIJ,
1353        MatIsStructurallySymmetric_SeqSBAIJ,
1354        MatIsHermitian_SeqSBAIJ
1355 };
1356 
1357 EXTERN_C_BEGIN
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1360 int MatStoreValues_SeqSBAIJ(Mat mat)
1361 {
1362   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1363   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1364   int         ierr;
1365 
1366   PetscFunctionBegin;
1367   if (aij->nonew != 1) {
1368     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1369   }
1370 
1371   /* allocate space for values if not already there */
1372   if (!aij->saved_values) {
1373     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1374   }
1375 
1376   /* copy values over */
1377   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 EXTERN_C_END
1381 
1382 EXTERN_C_BEGIN
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1385 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1386 {
1387   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1388   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1389 
1390   PetscFunctionBegin;
1391   if (aij->nonew != 1) {
1392     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1393   }
1394   if (!aij->saved_values) {
1395     SETERRQ(1,"Must call MatStoreValues(A);first");
1396   }
1397 
1398   /* copy values over */
1399   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 EXTERN_C_END
1403 
1404 EXTERN_C_BEGIN
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1407 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1408 {
1409   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1410   int          i,len,ierr,mbs,bs2;
1411   PetscTruth   flg;
1412 
1413   PetscFunctionBegin;
1414   B->preallocated = PETSC_TRUE;
1415   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1416   mbs  = B->m/bs;
1417   bs2  = bs*bs;
1418 
1419   if (mbs*bs != B->m) {
1420     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1421   }
1422 
1423   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1424   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1425   if (nnz) {
1426     for (i=0; i<mbs; i++) {
1427       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1428       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1429     }
1430   }
1431 
1432   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1433   if (!flg) {
1434     switch (bs) {
1435     case 1:
1436       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1437       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1438       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1439       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1440       B->ops->mult            = MatMult_SeqSBAIJ_1;
1441       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1442       break;
1443     case 2:
1444       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1445       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1446       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1447       B->ops->mult            = MatMult_SeqSBAIJ_2;
1448       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1449       break;
1450     case 3:
1451       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1452       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1453       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1454       B->ops->mult            = MatMult_SeqSBAIJ_3;
1455       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1456       break;
1457     case 4:
1458       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1459       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1460       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1461       B->ops->mult            = MatMult_SeqSBAIJ_4;
1462       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1463       break;
1464     case 5:
1465       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1466       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1467       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1468       B->ops->mult            = MatMult_SeqSBAIJ_5;
1469       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1470       break;
1471     case 6:
1472       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1473       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1474       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1475       B->ops->mult            = MatMult_SeqSBAIJ_6;
1476       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1477       break;
1478     case 7:
1479       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1480       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1481       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1482       B->ops->mult            = MatMult_SeqSBAIJ_7;
1483       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1484       break;
1485     }
1486   }
1487 
1488   b->mbs = mbs;
1489   b->nbs = mbs;
1490   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1491   if (!nnz) {
1492     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1493     else if (nz <= 0)        nz = 1;
1494     for (i=0; i<mbs; i++) {
1495       b->imax[i] = nz;
1496     }
1497     nz = nz*mbs; /* total nz */
1498   } else {
1499     nz = 0;
1500     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1501   }
1502   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1503 
1504   /* allocate the matrix space */
1505   len  = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1506   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1507   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1508   b->j = (int*)(b->a + nz*bs2);
1509   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1510   b->i = b->j + nz;
1511   b->singlemalloc = PETSC_TRUE;
1512 
1513   /* pointer to beginning of each row */
1514   b->i[0] = 0;
1515   for (i=1; i<mbs+1; i++) {
1516     b->i[i] = b->i[i-1] + b->imax[i-1];
1517   }
1518 
1519   /* b->ilen will count nonzeros in each block row so far. */
1520   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1521   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1522   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1523 
1524   b->bs               = bs;
1525   b->bs2              = bs2;
1526   b->nz             = 0;
1527   b->maxnz          = nz*bs2;
1528 
1529   b->inew             = 0;
1530   b->jnew             = 0;
1531   b->anew             = 0;
1532   b->a2anew           = 0;
1533   b->permute          = PETSC_FALSE;
1534   PetscFunctionReturn(0);
1535 }
1536 EXTERN_C_END
1537 
1538 EXTERN_C_BEGIN
1539 EXTERN int MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1540 EXTERN int MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*);
1541 EXTERN_C_END
1542 
1543 /*MC
1544    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1545    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1546 
1547    Options Database Keys:
1548 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1549 
1550   Level: beginner
1551 
1552 .seealso: MatCreateSeqSBAIJ
1553 M*/
1554 
1555 EXTERN_C_BEGIN
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1558 int MatCreate_SeqSBAIJ(Mat B)
1559 {
1560   Mat_SeqSBAIJ *b;
1561   int          ierr,size;
1562 
1563   PetscFunctionBegin;
1564   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1565   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1566   B->m = B->M = PetscMax(B->m,B->M);
1567   B->n = B->N = PetscMax(B->n,B->N);
1568 
1569   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1570   B->data = (void*)b;
1571   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1572   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1573   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1574   B->ops->view        = MatView_SeqSBAIJ;
1575   B->factor           = 0;
1576   B->lupivotthreshold = 1.0;
1577   B->mapping          = 0;
1578   b->row              = 0;
1579   b->icol             = 0;
1580   b->reallocs         = 0;
1581   b->saved_values     = 0;
1582 
1583   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1584   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1585 
1586   b->sorted           = PETSC_FALSE;
1587   b->roworiented      = PETSC_TRUE;
1588   b->nonew            = 0;
1589   b->diag             = 0;
1590   b->solve_work       = 0;
1591   b->mult_work        = 0;
1592   B->spptr            = 0;
1593   b->keepzeroedrows   = PETSC_FALSE;
1594   b->xtoy             = 0;
1595   b->XtoY             = 0;
1596 
1597   b->inew             = 0;
1598   b->jnew             = 0;
1599   b->anew             = 0;
1600   b->a2anew           = 0;
1601   b->permute          = PETSC_FALSE;
1602 
1603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1604                                      "MatStoreValues_SeqSBAIJ",
1605                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1607                                      "MatRetrieveValues_SeqSBAIJ",
1608                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1610                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1611                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1613                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1614                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1615   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1616                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1617                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1619                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1620                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1621   PetscFunctionReturn(0);
1622 }
1623 EXTERN_C_END
1624 
1625 #undef __FUNCT__
1626 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1627 /*@C
1628    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1629    compressed row) format.  For good matrix assembly performance the
1630    user should preallocate the matrix storage by setting the parameter nz
1631    (or the array nnz).  By setting these parameters accurately, performance
1632    during matrix assembly can be increased by more than a factor of 50.
1633 
1634    Collective on Mat
1635 
1636    Input Parameters:
1637 +  A - the symmetric matrix
1638 .  bs - size of block
1639 .  nz - number of block nonzeros per block row (same for all rows)
1640 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1641          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1642 
1643    Options Database Keys:
1644 .   -mat_no_unroll - uses code that does not unroll the loops in the
1645                      block calculations (much slower)
1646 .    -mat_block_size - size of the blocks to use
1647 
1648    Level: intermediate
1649 
1650    Notes:
1651    Specify the preallocated storage with either nz or nnz (not both).
1652    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1653    allocation.  For additional details, see the users manual chapter on
1654    matrices.
1655 
1656 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1657 @*/
1658 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1659   int ierr,(*f)(Mat,int,int,const int[]);
1660 
1661   PetscFunctionBegin;
1662   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1663   if (f) {
1664     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1665   }
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatCreateSeqSBAIJ"
1671 /*@C
1672    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1673    compressed row) format.  For good matrix assembly performance the
1674    user should preallocate the matrix storage by setting the parameter nz
1675    (or the array nnz).  By setting these parameters accurately, performance
1676    during matrix assembly can be increased by more than a factor of 50.
1677 
1678    Collective on MPI_Comm
1679 
1680    Input Parameters:
1681 +  comm - MPI communicator, set to PETSC_COMM_SELF
1682 .  bs - size of block
1683 .  m - number of rows, or number of columns
1684 .  nz - number of block nonzeros per block row (same for all rows)
1685 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1686          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1687 
1688    Output Parameter:
1689 .  A - the symmetric matrix
1690 
1691    Options Database Keys:
1692 .   -mat_no_unroll - uses code that does not unroll the loops in the
1693                      block calculations (much slower)
1694 .    -mat_block_size - size of the blocks to use
1695 
1696    Level: intermediate
1697 
1698    Notes:
1699 
1700    Specify the preallocated storage with either nz or nnz (not both).
1701    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1702    allocation.  For additional details, see the users manual chapter on
1703    matrices.
1704 
1705 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1706 @*/
1707 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1708 {
1709   int ierr;
1710 
1711   PetscFunctionBegin;
1712   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1713   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1714   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 #undef __FUNCT__
1719 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1720 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1721 {
1722   Mat          C;
1723   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1724   int          i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1725 
1726   PetscFunctionBegin;
1727   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1728 
1729   *B = 0;
1730   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1731   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1732   c    = (Mat_SeqSBAIJ*)C->data;
1733 
1734   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1735   C->preallocated   = PETSC_TRUE;
1736   C->factor         = A->factor;
1737   c->row            = 0;
1738   c->icol           = 0;
1739   c->saved_values   = 0;
1740   c->keepzeroedrows = a->keepzeroedrows;
1741   C->assembled      = PETSC_TRUE;
1742 
1743   C->M    = A->M;
1744   C->N    = A->N;
1745   c->bs   = a->bs;
1746   c->bs2  = a->bs2;
1747   c->mbs  = a->mbs;
1748   c->nbs  = a->nbs;
1749 
1750   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1751   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1752   for (i=0; i<mbs; i++) {
1753     c->imax[i] = a->imax[i];
1754     c->ilen[i] = a->ilen[i];
1755   }
1756 
1757   /* allocate the matrix space */
1758   c->singlemalloc = PETSC_TRUE;
1759   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1760   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1761   c->j = (int*)(c->a + nz*bs2);
1762   c->i = c->j + nz;
1763   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1764   if (mbs > 0) {
1765     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1766     if (cpvalues == MAT_COPY_VALUES) {
1767       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1768     } else {
1769       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1770     }
1771   }
1772 
1773   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1774   c->sorted      = a->sorted;
1775   c->roworiented = a->roworiented;
1776   c->nonew       = a->nonew;
1777 
1778   if (a->diag) {
1779     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1780     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1781     for (i=0; i<mbs; i++) {
1782       c->diag[i] = a->diag[i];
1783     }
1784   } else c->diag        = 0;
1785   c->nz               = a->nz;
1786   c->maxnz            = a->maxnz;
1787   c->solve_work         = 0;
1788   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1789   c->mult_work          = 0;
1790   *B = C;
1791   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 #undef __FUNCT__
1796 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1797 int MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1798 {
1799   Mat_SeqSBAIJ *a;
1800   Mat          B;
1801   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1802   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1803   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1804   int          *masked,nmask,tmp,bs2,ishift;
1805   PetscScalar  *aa;
1806   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1807 
1808   PetscFunctionBegin;
1809   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1810   bs2  = bs*bs;
1811 
1812   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1813   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1814   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1815   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1816   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1817   M = header[1]; N = header[2]; nz = header[3];
1818 
1819   if (header[3] < 0) {
1820     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1821   }
1822 
1823   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1824 
1825   /*
1826      This code adds extra rows to make sure the number of rows is
1827     divisible by the blocksize
1828   */
1829   mbs        = M/bs;
1830   extra_rows = bs - M + bs*(mbs);
1831   if (extra_rows == bs) extra_rows = 0;
1832   else                  mbs++;
1833   if (extra_rows) {
1834     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1835   }
1836 
1837   /* read in row lengths */
1838   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1839   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1840   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1841 
1842   /* read in column indices */
1843   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1844   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1845   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1846 
1847   /* loop over row lengths determining block row lengths */
1848   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1849   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1850   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1851   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1852   masked   = mask + mbs;
1853   rowcount = 0; nzcount = 0;
1854   for (i=0; i<mbs; i++) {
1855     nmask = 0;
1856     for (j=0; j<bs; j++) {
1857       kmax = rowlengths[rowcount];
1858       for (k=0; k<kmax; k++) {
1859         tmp = jj[nzcount++]/bs;   /* block col. index */
1860         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1861       }
1862       rowcount++;
1863     }
1864     s_browlengths[i] += nmask;
1865 
1866     /* zero out the mask elements we set */
1867     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1868   }
1869 
1870   /* create our matrix */
1871   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1872   ierr = MatSetType(B,type);CHKERRQ(ierr);
1873   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1874   a = (Mat_SeqSBAIJ*)B->data;
1875 
1876   /* set matrix "i" values */
1877   a->i[0] = 0;
1878   for (i=1; i<= mbs; i++) {
1879     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1880     a->ilen[i-1] = s_browlengths[i-1];
1881   }
1882   a->nz = a->i[mbs];
1883 
1884   /* read in nonzero values */
1885   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1886   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1887   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1888 
1889   /* set "a" and "j" values into matrix */
1890   nzcount = 0; jcount = 0;
1891   for (i=0; i<mbs; i++) {
1892     nzcountb = nzcount;
1893     nmask    = 0;
1894     for (j=0; j<bs; j++) {
1895       kmax = rowlengths[i*bs+j];
1896       for (k=0; k<kmax; k++) {
1897         tmp = jj[nzcount++]/bs; /* block col. index */
1898         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1899       }
1900     }
1901     /* sort the masked values */
1902     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1903 
1904     /* set "j" values into matrix */
1905     maskcount = 1;
1906     for (j=0; j<nmask; j++) {
1907       a->j[jcount++]  = masked[j];
1908       mask[masked[j]] = maskcount++;
1909     }
1910 
1911     /* set "a" values into matrix */
1912     ishift = bs2*a->i[i];
1913     for (j=0; j<bs; j++) {
1914       kmax = rowlengths[i*bs+j];
1915       for (k=0; k<kmax; k++) {
1916         tmp       = jj[nzcountb]/bs ; /* block col. index */
1917         if (tmp >= i){
1918           block     = mask[tmp] - 1;
1919           point     = jj[nzcountb] - bs*tmp;
1920           idx       = ishift + bs2*block + j + bs*point;
1921           a->a[idx] = aa[nzcountb];
1922         }
1923         nzcountb++;
1924       }
1925     }
1926     /* zero out the mask elements we set */
1927     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1928   }
1929   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1930 
1931   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1932   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1933   ierr = PetscFree(aa);CHKERRQ(ierr);
1934   ierr = PetscFree(jj);CHKERRQ(ierr);
1935   ierr = PetscFree(mask);CHKERRQ(ierr);
1936 
1937   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1938   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1939   ierr = MatView_Private(B);CHKERRQ(ierr);
1940   *A = B;
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 #undef __FUNCT__
1945 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1946 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1947 {
1948   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1949   MatScalar    *aa=a->a,*v,*v1;
1950   PetscScalar  *x,*b,*t,sum,d;
1951   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1952   int          nz,nz1,*vj,*vj1,i;
1953 
1954   PetscFunctionBegin;
1955   its = its*lits;
1956   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1957 
1958   if (bs > 1)
1959     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1960 
1961   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1962   if (xx != bb) {
1963     ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr);
1964   } else {
1965     b = x;
1966   }
1967 
1968   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1969 
1970   if (flag & SOR_ZERO_INITIAL_GUESS) {
1971     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1972       for (i=0; i<m; i++)
1973         t[i] = b[i];
1974 
1975       for (i=0; i<m; i++){
1976         d  = *(aa + ai[i]);  /* diag[i] */
1977         v  = aa + ai[i] + 1;
1978         vj = aj + ai[i] + 1;
1979         nz = ai[i+1] - ai[i] - 1;
1980         x[i] = omega*t[i]/d;
1981         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1982         PetscLogFlops(2*nz-1);
1983       }
1984     }
1985 
1986     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1987       for (i=0; i<m; i++)
1988         t[i] = b[i];
1989 
1990       for (i=0; i<m-1; i++){  /* update rhs */
1991         v  = aa + ai[i] + 1;
1992         vj = aj + ai[i] + 1;
1993         nz = ai[i+1] - ai[i] - 1;
1994         while (nz--) t[*vj++] -= x[i]*(*v++);
1995         PetscLogFlops(2*nz-1);
1996       }
1997       for (i=m-1; i>=0; i--){
1998         d  = *(aa + ai[i]);
1999         v  = aa + ai[i] + 1;
2000         vj = aj + ai[i] + 1;
2001         nz = ai[i+1] - ai[i] - 1;
2002         sum = t[i];
2003         while (nz--) sum -= x[*vj++]*(*v++);
2004         PetscLogFlops(2*nz-1);
2005         x[i] =   (1-omega)*x[i] + omega*sum/d;
2006       }
2007     }
2008     its--;
2009   }
2010 
2011   while (its--) {
2012     /*
2013        forward sweep:
2014        for i=0,...,m-1:
2015          sum[i] = (b[i] - U(i,:)x )/d[i];
2016          x[i]   = (1-omega)x[i] + omega*sum[i];
2017          b      = b - x[i]*U^T(i,:);
2018 
2019     */
2020     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2021       for (i=0; i<m; i++)
2022         t[i] = b[i];
2023 
2024       for (i=0; i<m; i++){
2025         d  = *(aa + ai[i]);  /* diag[i] */
2026         v  = aa + ai[i] + 1; v1=v;
2027         vj = aj + ai[i] + 1; vj1=vj;
2028         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2029         sum = t[i];
2030         while (nz1--) sum -= (*v1++)*x[*vj1++];
2031         x[i] = (1-omega)*x[i] + omega*sum/d;
2032         while (nz--) t[*vj++] -= x[i]*(*v++);
2033         PetscLogFlops(4*nz-2);
2034       }
2035     }
2036 
2037   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2038       /*
2039        backward sweep:
2040        b = b - x[i]*U^T(i,:), i=0,...,n-2
2041        for i=m-1,...,0:
2042          sum[i] = (b[i] - U(i,:)x )/d[i];
2043          x[i]   = (1-omega)x[i] + omega*sum[i];
2044       */
2045       for (i=0; i<m; i++)
2046         t[i] = b[i];
2047 
2048       for (i=0; i<m-1; i++){  /* update rhs */
2049         v  = aa + ai[i] + 1;
2050         vj = aj + ai[i] + 1;
2051         nz = ai[i+1] - ai[i] - 1;
2052         while (nz--) t[*vj++] -= x[i]*(*v++);
2053         PetscLogFlops(2*nz-1);
2054       }
2055       for (i=m-1; i>=0; i--){
2056         d  = *(aa + ai[i]);
2057         v  = aa + ai[i] + 1;
2058         vj = aj + ai[i] + 1;
2059         nz = ai[i+1] - ai[i] - 1;
2060         sum = t[i];
2061         while (nz--) sum -= x[*vj++]*(*v++);
2062         PetscLogFlops(2*nz-1);
2063         x[i] =   (1-omega)*x[i] + omega*sum/d;
2064       }
2065     }
2066   }
2067 
2068   ierr = PetscFree(t); CHKERRQ(ierr);
2069   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
2070   if (bb != xx) {
2071     ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr);
2072   }
2073 
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 
2078 
2079 
2080 
2081 
2082