xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision b6b3a6b24117a697d33d4b93e2ced21238e64e70)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int nz = a->i[n];
63     for (i=0; i<nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int nz = a->i[n]-1;
84     for (i=0; i<nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   case MAT_NOT_SYMMETRIC:
183   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
184   case MAT_HERMITIAN:
185     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
186   case MAT_SYMMETRIC:
187   case MAT_STRUCTURALLY_SYMMETRIC:
188   case MAT_NOT_HERMITIAN:
189   case MAT_SYMMETRY_ETERNAL:
190   case MAT_NOT_SYMMETRY_ETERNAL:
191     break;
192   default:
193     SETERRQ(PETSC_ERR_SUP,"unknown option");
194   }
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
200 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
201 {
202   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
203   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
204   MatScalar    *aa,*aa_i;
205   PetscScalar  *v_i;
206 
207   PetscFunctionBegin;
208   bs  = a->bs;
209   ai  = a->i;
210   aj  = a->j;
211   aa  = a->a;
212   bs2 = a->bs2;
213 
214   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
215 
216   bn  = row/bs;   /* Block number */
217   bp  = row % bs; /* Block position */
218   M   = ai[bn+1] - ai[bn];
219   *ncols = bs*M;
220 
221   if (v) {
222     *v = 0;
223     if (*ncols) {
224       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
225       for (i=0; i<M; i++) { /* for each block in the block row */
226         v_i  = *v + i*bs;
227         aa_i = aa + bs2*(ai[bn] + i);
228         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
229       }
230     }
231   }
232 
233   if (cols) {
234     *cols = 0;
235     if (*ncols) {
236       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
237       for (i=0; i<M; i++) { /* for each block in the block row */
238         cols_i = *cols + i*bs;
239         itmp  = bs*aj[ai[bn] + i];
240         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
241       }
242     }
243   }
244 
245   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
246   /* this segment is currently removed, so only entries in the upper triangle are obtained */
247 #ifdef column_search
248   v_i    = *v    + M*bs;
249   cols_i = *cols + M*bs;
250   for (i=0; i<bn; i++){ /* for each block row */
251     M = ai[i+1] - ai[i];
252     for (j=0; j<M; j++){
253       itmp = aj[ai[i] + j];    /* block column value */
254       if (itmp == bn){
255         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
256         for (k=0; k<bs; k++) {
257           *cols_i++ = i*bs+k;
258           *v_i++    = aa_i[k];
259         }
260         *ncols += bs;
261         break;
262       }
263     }
264   }
265 #endif
266 
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
272 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
273 {
274   int ierr;
275 
276   PetscFunctionBegin;
277   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
278   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
284 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
285 {
286   int ierr;
287   PetscFunctionBegin;
288   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
294 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
295 {
296   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
297   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
298   char              *name;
299   PetscViewerFormat format;
300 
301   PetscFunctionBegin;
302   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
303   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
304   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
305     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
306   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
307     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
308   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
309     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
310     for (i=0; i<a->mbs; i++) {
311       for (j=0; j<bs; j++) {
312         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
313         for (k=a->i[i]; k<a->i[i+1]; k++) {
314           for (l=0; l<bs; l++) {
315 #if defined(PETSC_USE_COMPLEX)
316             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
317               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
318                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
319             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
320               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
321                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
322             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
323               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
324             }
325 #else
326             if (a->a[bs2*k + l*bs + j] != 0.0) {
327               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
328             }
329 #endif
330           }
331         }
332         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
333       }
334     }
335     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
336   } else {
337     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
338     for (i=0; i<a->mbs; i++) {
339       for (j=0; j<bs; j++) {
340         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
341         for (k=a->i[i]; k<a->i[i+1]; k++) {
342           for (l=0; l<bs; l++) {
343 #if defined(PETSC_USE_COMPLEX)
344             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
345               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
346                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
347             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
348               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
349                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
350             } else {
351               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
352             }
353 #else
354             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
355 #endif
356           }
357         }
358         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
359       }
360     }
361     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
362   }
363   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
364   PetscFunctionReturn(0);
365 }
366 
367 #undef __FUNCT__
368 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
369 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
370 {
371   Mat          A = (Mat) Aa;
372   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
373   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
374   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
375   MatScalar    *aa;
376   MPI_Comm     comm;
377   PetscViewer  viewer;
378 
379   PetscFunctionBegin;
380   /*
381       This is nasty. If this is called from an originally parallel matrix
382    then all processes call this,but only the first has the matrix so the
383    rest should return immediately.
384   */
385   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
386   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
387   if (rank) PetscFunctionReturn(0);
388 
389   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
390 
391   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
392   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
393 
394   /* loop over matrix elements drawing boxes */
395   color = PETSC_DRAW_BLUE;
396   for (i=0,row=0; i<mbs; i++,row+=bs) {
397     for (j=a->i[i]; j<a->i[i+1]; j++) {
398       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
399       x_l = a->j[j]*bs; x_r = x_l + 1.0;
400       aa = a->a + j*bs2;
401       for (k=0; k<bs; k++) {
402         for (l=0; l<bs; l++) {
403           if (PetscRealPart(*aa++) >=  0.) continue;
404           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
405         }
406       }
407     }
408   }
409   color = PETSC_DRAW_CYAN;
410   for (i=0,row=0; i<mbs; i++,row+=bs) {
411     for (j=a->i[i]; j<a->i[i+1]; j++) {
412       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
413       x_l = a->j[j]*bs; x_r = x_l + 1.0;
414       aa = a->a + j*bs2;
415       for (k=0; k<bs; k++) {
416         for (l=0; l<bs; l++) {
417           if (PetscRealPart(*aa++) != 0.) continue;
418           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
419         }
420       }
421     }
422   }
423 
424   color = PETSC_DRAW_RED;
425   for (i=0,row=0; i<mbs; i++,row+=bs) {
426     for (j=a->i[i]; j<a->i[i+1]; j++) {
427       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
428       x_l = a->j[j]*bs; x_r = x_l + 1.0;
429       aa = a->a + j*bs2;
430       for (k=0; k<bs; k++) {
431         for (l=0; l<bs; l++) {
432           if (PetscRealPart(*aa++) <= 0.) continue;
433           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
434         }
435       }
436     }
437   }
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
443 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
444 {
445   int          ierr;
446   PetscReal    xl,yl,xr,yr,w,h;
447   PetscDraw    draw;
448   PetscTruth   isnull;
449 
450   PetscFunctionBegin;
451 
452   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
453   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
454 
455   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
456   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
457   xr += w;    yr += h;  xl = -w;     yl = -h;
458   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
459   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
460   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatView_SeqSBAIJ"
466 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
467 {
468   int        ierr;
469   PetscTruth isascii,isdraw;
470 
471   PetscFunctionBegin;
472   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
473   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
474   if (isascii){
475     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
476   } else if (isdraw) {
477     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
478   } else {
479     Mat B;
480     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
481     ierr = MatView(B,viewer);CHKERRQ(ierr);
482     ierr = MatDestroy(B);CHKERRQ(ierr);
483   }
484   PetscFunctionReturn(0);
485 }
486 
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
490 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
491 {
492   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
493   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
494   int        *ai = a->i,*ailen = a->ilen;
495   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
496   MatScalar  *ap,*aa = a->a,zero = 0.0;
497 
498   PetscFunctionBegin;
499   for (k=0; k<m; k++) { /* loop over rows */
500     row  = im[k]; brow = row/bs;
501     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
502     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
503     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
504     nrow = ailen[brow];
505     for (l=0; l<n; l++) { /* loop over columns */
506       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
507       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
508       col  = in[l] ;
509       bcol = col/bs;
510       cidx = col%bs;
511       ridx = row%bs;
512       high = nrow;
513       low  = 0; /* assume unsorted */
514       while (high-low > 5) {
515         t = (low+high)/2;
516         if (rp[t] > bcol) high = t;
517         else             low  = t;
518       }
519       for (i=low; i<high; i++) {
520         if (rp[i] > bcol) break;
521         if (rp[i] == bcol) {
522           *v++ = ap[bs2*i+bs*cidx+ridx];
523           goto finished;
524         }
525       }
526       *v++ = zero;
527       finished:;
528     }
529   }
530   PetscFunctionReturn(0);
531 }
532 
533 
534 #undef __FUNCT__
535 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
536 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
537 {
538   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
539   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
540   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
541   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
542   PetscTruth      roworiented=a->roworiented;
543   const MatScalar *value = v;
544   MatScalar       *ap,*aa = a->a,*bap;
545 
546   PetscFunctionBegin;
547   if (roworiented) {
548     stepval = (n-1)*bs;
549   } else {
550     stepval = (m-1)*bs;
551   }
552   for (k=0; k<m; k++) { /* loop over added rows */
553     row  = im[k];
554     if (row < 0) continue;
555 #if defined(PETSC_USE_BOPT_g)
556     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
557 #endif
558     rp   = aj + ai[row];
559     ap   = aa + bs2*ai[row];
560     rmax = imax[row];
561     nrow = ailen[row];
562     low  = 0;
563     for (l=0; l<n; l++) { /* loop over added columns */
564       if (in[l] < 0) continue;
565       col = in[l];
566 #if defined(PETSC_USE_BOPT_g)
567       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
568 #endif
569       if (col < row) continue; /* ignore lower triangular block */
570       if (roworiented) {
571         value = v + k*(stepval+bs)*bs + l*bs;
572       } else {
573         value = v + l*(stepval+bs)*bs + k*bs;
574       }
575       if (!sorted) low = 0; high = nrow;
576       while (high-low > 7) {
577         t = (low+high)/2;
578         if (rp[t] > col) high = t;
579         else             low  = t;
580       }
581       for (i=low; i<high; i++) {
582         if (rp[i] > col) break;
583         if (rp[i] == col) {
584           bap  = ap +  bs2*i;
585           if (roworiented) {
586             if (is == ADD_VALUES) {
587               for (ii=0; ii<bs; ii++,value+=stepval) {
588                 for (jj=ii; jj<bs2; jj+=bs) {
589                   bap[jj] += *value++;
590                 }
591               }
592             } else {
593               for (ii=0; ii<bs; ii++,value+=stepval) {
594                 for (jj=ii; jj<bs2; jj+=bs) {
595                   bap[jj] = *value++;
596                 }
597               }
598             }
599           } else {
600             if (is == ADD_VALUES) {
601               for (ii=0; ii<bs; ii++,value+=stepval) {
602                 for (jj=0; jj<bs; jj++) {
603                   *bap++ += *value++;
604                 }
605               }
606             } else {
607               for (ii=0; ii<bs; ii++,value+=stepval) {
608                 for (jj=0; jj<bs; jj++) {
609                   *bap++  = *value++;
610                 }
611               }
612             }
613           }
614           goto noinsert2;
615         }
616       }
617       if (nonew == 1) goto noinsert2;
618       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
619       if (nrow >= rmax) {
620         /* there is no extra room in row, therefore enlarge */
621         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
622         MatScalar *new_a;
623 
624         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
625 
626         /* malloc new storage space */
627         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
628 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
629         new_j   = (int*)(new_a + bs2*new_nz);
630         new_i   = new_j + new_nz;
631 
632         /* copy over old data into new slots */
633         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
634         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
635         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
636         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
637         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
638         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
639         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
640         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
641         /* free up old matrix storage */
642         ierr = PetscFree(a->a);CHKERRQ(ierr);
643         if (!a->singlemalloc) {
644           ierr = PetscFree(a->i);CHKERRQ(ierr);
645           ierr = PetscFree(a->j);CHKERRQ(ierr);
646         }
647         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
648         a->singlemalloc = PETSC_TRUE;
649 
650         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
651         rmax = imax[row] = imax[row] + CHUNKSIZE;
652         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
653         a->maxnz += bs2*CHUNKSIZE;
654         a->reallocs++;
655         a->nz++;
656       }
657       N = nrow++ - 1;
658       /* shift up all the later entries in this row */
659       for (ii=N; ii>=i; ii--) {
660         rp[ii+1] = rp[ii];
661         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
662       }
663       if (N >= i) {
664         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
665       }
666       rp[i] = col;
667       bap   = ap +  bs2*i;
668       if (roworiented) {
669         for (ii=0; ii<bs; ii++,value+=stepval) {
670           for (jj=ii; jj<bs2; jj+=bs) {
671             bap[jj] = *value++;
672           }
673         }
674       } else {
675         for (ii=0; ii<bs; ii++,value+=stepval) {
676           for (jj=0; jj<bs; jj++) {
677             *bap++  = *value++;
678           }
679         }
680       }
681       noinsert2:;
682       low = i;
683     }
684     ailen[row] = nrow;
685   }
686   PetscFunctionReturn(0);
687 }
688 
689 #undef __FUNCT__
690 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
691 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
692 {
693   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
694   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
695   int        m = A->m,*ip,N,*ailen = a->ilen;
696   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
697   MatScalar  *aa = a->a,*ap;
698 
699   PetscFunctionBegin;
700   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
701 
702   if (m) rmax = ailen[0];
703   for (i=1; i<mbs; i++) {
704     /* move each row back by the amount of empty slots (fshift) before it*/
705     fshift += imax[i-1] - ailen[i-1];
706     rmax   = PetscMax(rmax,ailen[i]);
707     if (fshift) {
708       ip = aj + ai[i]; ap = aa + bs2*ai[i];
709       N = ailen[i];
710       for (j=0; j<N; j++) {
711         ip[j-fshift] = ip[j];
712         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
713       }
714     }
715     ai[i] = ai[i-1] + ailen[i-1];
716   }
717   if (mbs) {
718     fshift += imax[mbs-1] - ailen[mbs-1];
719     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
720   }
721   /* reset ilen and imax for each row */
722   for (i=0; i<mbs; i++) {
723     ailen[i] = imax[i] = ai[i+1] - ai[i];
724   }
725   a->nz = ai[mbs];
726 
727   /* diagonals may have moved, reset it */
728   if (a->diag) {
729     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
730   }
731   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
732            m,A->m,a->bs,fshift*bs2,a->nz*bs2);
733   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
734            a->reallocs);
735   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
736   a->reallocs          = 0;
737   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
738 
739   PetscFunctionReturn(0);
740 }
741 
742 /*
743    This function returns an array of flags which indicate the locations of contiguous
744    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
745    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
746    Assume: sizes should be long enough to hold all the values.
747 */
748 #undef __FUNCT__
749 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
750 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
751 {
752   int        i,j,k,row;
753   PetscTruth flg;
754 
755   PetscFunctionBegin;
756   for (i=0,j=0; i<n; j++) {
757     row = idx[i];
758     if (row%bs!=0) { /* Not the begining of a block */
759       sizes[j] = 1;
760       i++;
761     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
762       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
763       i++;
764     } else { /* Begining of the block, so check if the complete block exists */
765       flg = PETSC_TRUE;
766       for (k=1; k<bs; k++) {
767         if (row+k != idx[i+k]) { /* break in the block */
768           flg = PETSC_FALSE;
769           break;
770         }
771       }
772       if (flg == PETSC_TRUE) { /* No break in the bs */
773         sizes[j] = bs;
774         i+= bs;
775       } else {
776         sizes[j] = 1;
777         i++;
778       }
779     }
780   }
781   *bs_max = j;
782   PetscFunctionReturn(0);
783 }
784 
785 #undef __FUNCT__
786 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
787 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
788 {
789   PetscFunctionBegin;
790   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
791 }
792 
793 /* Only add/insert a(i,j) with i<=j (blocks).
794    Any a(i,j) with i>j input by user is ingored.
795 */
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
799 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
800 {
801   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
802   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
803   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
804   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
805   int         ridx,cidx,bs2=a->bs2,ierr;
806   MatScalar   *ap,value,*aa=a->a,*bap;
807 
808   PetscFunctionBegin;
809 
810   for (k=0; k<m; k++) { /* loop over added rows */
811     row  = im[k];       /* row number */
812     brow = row/bs;      /* block row number */
813     if (row < 0) continue;
814 #if defined(PETSC_USE_BOPT_g)
815     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
816 #endif
817     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
818     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
819     rmax = imax[brow];  /* maximum space allocated for this row */
820     nrow = ailen[brow]; /* actual length of this row */
821     low  = 0;
822 
823     for (l=0; l<n; l++) { /* loop over added columns */
824       if (in[l] < 0) continue;
825 #if defined(PETSC_USE_BOPT_g)
826       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
827 #endif
828       col = in[l];
829       bcol = col/bs;              /* block col number */
830 
831       if (brow <= bcol){
832         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
833         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
834           /* element value a(k,l) */
835           if (roworiented) {
836             value = v[l + k*n];
837           } else {
838             value = v[k + l*m];
839           }
840 
841           /* move pointer bap to a(k,l) quickly and add/insert value */
842           if (!sorted) low = 0; high = nrow;
843           while (high-low > 7) {
844             t = (low+high)/2;
845             if (rp[t] > bcol) high = t;
846             else              low  = t;
847           }
848           for (i=low; i<high; i++) {
849             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
850             if (rp[i] > bcol) break;
851             if (rp[i] == bcol) {
852               bap  = ap +  bs2*i + bs*cidx + ridx;
853               if (is == ADD_VALUES) *bap += value;
854               else                  *bap  = value;
855               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
856               if (brow == bcol && ridx < cidx){
857                 bap  = ap +  bs2*i + bs*ridx + cidx;
858                 if (is == ADD_VALUES) *bap += value;
859                 else                  *bap  = value;
860               }
861               goto noinsert1;
862             }
863           }
864 
865       if (nonew == 1) goto noinsert1;
866       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
867       if (nrow >= rmax) {
868         /* there is no extra room in row, therefore enlarge */
869         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
870         MatScalar *new_a;
871 
872         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
873 
874         /* Malloc new storage space */
875         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
876         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
877         new_j = (int*)(new_a + bs2*new_nz);
878         new_i = new_j + new_nz;
879 
880         /* copy over old data into new slots */
881         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
882         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
883         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
884         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
885         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
886         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
887         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
888         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
889         /* free up old matrix storage */
890         ierr = PetscFree(a->a);CHKERRQ(ierr);
891         if (!a->singlemalloc) {
892           ierr = PetscFree(a->i);CHKERRQ(ierr);
893           ierr = PetscFree(a->j);CHKERRQ(ierr);
894         }
895         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
896         a->singlemalloc = PETSC_TRUE;
897 
898         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
899         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
900         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
901         a->maxnz += bs2*CHUNKSIZE;
902         a->reallocs++;
903         a->nz++;
904       }
905 
906       N = nrow++ - 1;
907       /* shift up all the later entries in this row */
908       for (ii=N; ii>=i; ii--) {
909         rp[ii+1] = rp[ii];
910         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
911       }
912       if (N>=i) {
913         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
914       }
915       rp[i]                      = bcol;
916       ap[bs2*i + bs*cidx + ridx] = value;
917       noinsert1:;
918       low = i;
919       /* } */
920         }
921       } /* end of if .. if..  */
922     }                     /* end of loop over added columns */
923     ailen[brow] = nrow;
924   }                       /* end of loop over added rows */
925 
926   PetscFunctionReturn(0);
927 }
928 
929 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
930 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
931 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
932 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
933 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
934 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
935 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
936 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
937 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
938 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
939 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
940 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
941 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
942 extern int MatZeroEntries_SeqSBAIJ(Mat);
943 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
944 
945 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
946 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
947 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
948 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
949 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
950 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
951 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
952 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
953 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
954 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
955 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
956 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
957 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
958 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
959 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
960 
961 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
962 
963 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
964 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
965 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
966 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
967 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
968 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
969 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
970 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
971 
972 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
973 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
974 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
975 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
976 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
977 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
978 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
979 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
980 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
981 
982 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
983 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
984 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
985 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
986 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
987 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
988 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
989 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
990 
991 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
992 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
993 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
994 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
995 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
996 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
997 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
998 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
999 
1000 #ifdef HAVE_MatICCFactor
1001 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1004 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1005 {
1006   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1007   Mat         outA;
1008   int         ierr;
1009   PetscTruth  row_identity,col_identity;
1010 
1011   PetscFunctionBegin;
1012   /*
1013   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1014   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1015   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1016   if (!row_identity || !col_identity) {
1017     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1018   }
1019   */
1020 
1021   outA          = inA;
1022   inA->factor   = FACTOR_CHOLESKY;
1023 
1024   if (!a->diag) {
1025     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1026   }
1027   /*
1028       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1029       for ILU(0) factorization with natural ordering
1030   */
1031   switch (a->bs) {
1032   case 1:
1033     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1034     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1035     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1036     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1037   case 2:
1038     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1039     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1040     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1041     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1042     break;
1043   case 3:
1044     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1045     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1046     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1047     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1048     break;
1049   case 4:
1050     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1051     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1052     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1053     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1054     break;
1055   case 5:
1056     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1057     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1058     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1059     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1060     break;
1061   case 6:
1062     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1063     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1064     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1065     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1066     break;
1067   case 7:
1068     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1069     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1070     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1071     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1072     break;
1073   default:
1074     a->row        = row;
1075     a->icol       = col;
1076     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1077     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1078 
1079     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1080     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1081     PetscLogObjectParent(inA,a->icol);
1082 
1083     if (!a->solve_work) {
1084       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1085       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1086     }
1087   }
1088 
1089   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1090 
1091   PetscFunctionReturn(0);
1092 }
1093 #endif
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1097 int MatPrintHelp_SeqSBAIJ(Mat A)
1098 {
1099   static PetscTruth called = PETSC_FALSE;
1100   MPI_Comm          comm = A->comm;
1101   int               ierr;
1102 
1103   PetscFunctionBegin;
1104   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1105   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1106   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 EXTERN_C_BEGIN
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1113 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1114 {
1115   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1116   int         i,nz,n;
1117 
1118   PetscFunctionBegin;
1119   nz = baij->maxnz;
1120   n  = mat->n;
1121   for (i=0; i<nz; i++) {
1122     baij->j[i] = indices[i];
1123   }
1124   baij->nz = nz;
1125   for (i=0; i<n; i++) {
1126     baij->ilen[i] = baij->imax[i];
1127   }
1128 
1129   PetscFunctionReturn(0);
1130 }
1131 EXTERN_C_END
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1135 /*@
1136     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1137        in the matrix.
1138 
1139   Input Parameters:
1140 +  mat     - the SeqSBAIJ matrix
1141 -  indices - the column indices
1142 
1143   Level: advanced
1144 
1145   Notes:
1146     This can be called if you have precomputed the nonzero structure of the
1147   matrix and want to provide it to the matrix object to improve the performance
1148   of the MatSetValues() operation.
1149 
1150     You MUST have set the correct numbers of nonzeros per row in the call to
1151   MatCreateSeqSBAIJ().
1152 
1153     MUST be called before any calls to MatSetValues()
1154 
1155 .seealso: MatCreateSeqSBAIJ
1156 @*/
1157 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1158 {
1159   int ierr,(*f)(Mat,int *);
1160 
1161   PetscFunctionBegin;
1162   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1163   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1164   if (f) {
1165     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1166   } else {
1167     SETERRQ(1,"Wrong type of matrix to set column indices");
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1174 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1175 {
1176   int        ierr;
1177 
1178   PetscFunctionBegin;
1179   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1185 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1186 {
1187   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1188   PetscFunctionBegin;
1189   *array = a->a;
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1195 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1196 {
1197   PetscFunctionBegin;
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #include "petscblaslapack.h"
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1204 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1205 {
1206   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1207   int          ierr,one=1,i,bs=y->bs,bs2,j;
1208 
1209   PetscFunctionBegin;
1210   if (str == SAME_NONZERO_PATTERN) {
1211     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1212   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1213     if (y->xtoy && y->XtoY != X) {
1214       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1215       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1216     }
1217     if (!y->xtoy) { /* get xtoy */
1218       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1219       y->XtoY = X;
1220     }
1221     bs2 = bs*bs;
1222     for (i=0; i<x->nz; i++) {
1223       j = 0;
1224       while (j < bs2){
1225         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1226         j++;
1227       }
1228     }
1229     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1230   } else {
1231     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1232   }
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1238 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1239 {
1240   PetscFunctionBegin;
1241   *flg = PETSC_TRUE;
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1247 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1248 {
1249   PetscFunctionBegin;
1250   *flg = PETSC_TRUE;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1256 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1257 {
1258   PetscFunctionBegin;
1259   *flg = PETSC_FALSE;
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 /* -------------------------------------------------------------------*/
1264 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1265        MatGetRow_SeqSBAIJ,
1266        MatRestoreRow_SeqSBAIJ,
1267        MatMult_SeqSBAIJ_N,
1268 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1269        MatMultTranspose_SeqSBAIJ,
1270        MatMultTransposeAdd_SeqSBAIJ,
1271        MatSolve_SeqSBAIJ_N,
1272        0,
1273        0,
1274 /*10*/ 0,
1275        0,
1276        MatCholeskyFactor_SeqSBAIJ,
1277        MatRelax_SeqSBAIJ,
1278        MatTranspose_SeqSBAIJ,
1279 /*15*/ MatGetInfo_SeqSBAIJ,
1280        MatEqual_SeqSBAIJ,
1281        MatGetDiagonal_SeqSBAIJ,
1282        MatDiagonalScale_SeqSBAIJ,
1283        MatNorm_SeqSBAIJ,
1284 /*20*/ 0,
1285        MatAssemblyEnd_SeqSBAIJ,
1286        0,
1287        MatSetOption_SeqSBAIJ,
1288        MatZeroEntries_SeqSBAIJ,
1289 /*25*/ MatZeroRows_SeqSBAIJ,
1290        0,
1291        0,
1292        MatCholeskyFactorSymbolic_SeqSBAIJ,
1293        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1294 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1295        0,
1296        MatICCFactorSymbolic_SeqSBAIJ,
1297        MatGetArray_SeqSBAIJ,
1298        MatRestoreArray_SeqSBAIJ,
1299 /*35*/ MatDuplicate_SeqSBAIJ,
1300        0,
1301        0,
1302        0,
1303        0,
1304 /*40*/ MatAXPY_SeqSBAIJ,
1305        MatGetSubMatrices_SeqSBAIJ,
1306        MatIncreaseOverlap_SeqSBAIJ,
1307        MatGetValues_SeqSBAIJ,
1308        0,
1309 /*45*/ MatPrintHelp_SeqSBAIJ,
1310        MatScale_SeqSBAIJ,
1311        0,
1312        0,
1313        0,
1314 /*50*/ MatGetBlockSize_SeqSBAIJ,
1315        MatGetRowIJ_SeqSBAIJ,
1316        MatRestoreRowIJ_SeqSBAIJ,
1317        0,
1318        0,
1319 /*55*/ 0,
1320        0,
1321        0,
1322        0,
1323        MatSetValuesBlocked_SeqSBAIJ,
1324 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1325        0,
1326        0,
1327        MatGetPetscMaps_Petsc,
1328        0,
1329 /*65*/ 0,
1330        0,
1331        0,
1332        0,
1333        0,
1334 /*70*/ MatGetRowMax_SeqSBAIJ,
1335        0,
1336        0,
1337        0,
1338        0,
1339 /*75*/ 0,
1340        0,
1341        0,
1342        0,
1343        0,
1344 /*80*/ 0,
1345        0,
1346        0,
1347 #if !defined(PETSC_USE_COMPLEX)
1348        MatGetInertia_SeqSBAIJ,
1349 #else
1350        0,
1351 #endif
1352 /*85*/ MatLoad_SeqSBAIJ,
1353        MatIsSymmetric_SeqSBAIJ,
1354        MatIsStructurallySymmetric_SeqSBAIJ,
1355        MatIsHermitian_SeqSBAIJ
1356 };
1357 
1358 EXTERN_C_BEGIN
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1361 int MatStoreValues_SeqSBAIJ(Mat mat)
1362 {
1363   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1364   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1365   int         ierr;
1366 
1367   PetscFunctionBegin;
1368   if (aij->nonew != 1) {
1369     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1370   }
1371 
1372   /* allocate space for values if not already there */
1373   if (!aij->saved_values) {
1374     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1375   }
1376 
1377   /* copy values over */
1378   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 EXTERN_C_END
1382 
1383 EXTERN_C_BEGIN
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1386 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1387 {
1388   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1389   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1390 
1391   PetscFunctionBegin;
1392   if (aij->nonew != 1) {
1393     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1394   }
1395   if (!aij->saved_values) {
1396     SETERRQ(1,"Must call MatStoreValues(A);first");
1397   }
1398 
1399   /* copy values over */
1400   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1401   PetscFunctionReturn(0);
1402 }
1403 EXTERN_C_END
1404 
1405 EXTERN_C_BEGIN
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1408 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1409 {
1410   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1411   int          i,len,ierr,mbs,bs2;
1412   PetscTruth   flg;
1413 
1414   PetscFunctionBegin;
1415   B->preallocated = PETSC_TRUE;
1416   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1417   mbs  = B->m/bs;
1418   bs2  = bs*bs;
1419 
1420   if (mbs*bs != B->m) {
1421     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1422   }
1423 
1424   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1425   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1426   if (nnz) {
1427     for (i=0; i<mbs; i++) {
1428       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1429       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1430     }
1431   }
1432 
1433   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1434   if (!flg) {
1435     switch (bs) {
1436     case 1:
1437       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1438       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1439       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1440       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1441       B->ops->mult            = MatMult_SeqSBAIJ_1;
1442       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1443       break;
1444     case 2:
1445       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1446       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1447       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1448       B->ops->mult            = MatMult_SeqSBAIJ_2;
1449       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1450       break;
1451     case 3:
1452       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1453       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1454       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1455       B->ops->mult            = MatMult_SeqSBAIJ_3;
1456       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1457       break;
1458     case 4:
1459       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1460       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1461       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1462       B->ops->mult            = MatMult_SeqSBAIJ_4;
1463       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1464       break;
1465     case 5:
1466       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1467       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1468       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1469       B->ops->mult            = MatMult_SeqSBAIJ_5;
1470       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1471       break;
1472     case 6:
1473       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1474       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1475       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1476       B->ops->mult            = MatMult_SeqSBAIJ_6;
1477       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1478       break;
1479     case 7:
1480       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1481       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1482       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1483       B->ops->mult            = MatMult_SeqSBAIJ_7;
1484       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1485       break;
1486     }
1487   }
1488 
1489   b->mbs = mbs;
1490   b->nbs = mbs;
1491   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1492   if (!nnz) {
1493     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1494     else if (nz <= 0)        nz = 1;
1495     for (i=0; i<mbs; i++) {
1496       b->imax[i] = nz;
1497     }
1498     nz = nz*mbs; /* total nz */
1499   } else {
1500     nz = 0;
1501     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1502   }
1503   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1504 
1505   /* allocate the matrix space */
1506   len  = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1507   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1508   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1509   b->j = (int*)(b->a + nz*bs2);
1510   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1511   b->i = b->j + nz;
1512   b->singlemalloc = PETSC_TRUE;
1513 
1514   /* pointer to beginning of each row */
1515   b->i[0] = 0;
1516   for (i=1; i<mbs+1; i++) {
1517     b->i[i] = b->i[i-1] + b->imax[i-1];
1518   }
1519 
1520   /* b->ilen will count nonzeros in each block row so far. */
1521   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1522   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1523   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1524 
1525   b->bs               = bs;
1526   b->bs2              = bs2;
1527   b->nz             = 0;
1528   b->maxnz          = nz*bs2;
1529 
1530   b->inew             = 0;
1531   b->jnew             = 0;
1532   b->anew             = 0;
1533   b->a2anew           = 0;
1534   b->permute          = PETSC_FALSE;
1535   PetscFunctionReturn(0);
1536 }
1537 EXTERN_C_END
1538 
1539 EXTERN_C_BEGIN
1540 EXTERN int MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1541 EXTERN int MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*);
1542 EXTERN_C_END
1543 
1544 /*MC
1545    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1546    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1547 
1548    Options Database Keys:
1549 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1550 
1551   Level: beginner
1552 
1553 .seealso: MatCreateSeqSBAIJ
1554 M*/
1555 
1556 EXTERN_C_BEGIN
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1559 int MatCreate_SeqSBAIJ(Mat B)
1560 {
1561   Mat_SeqSBAIJ *b;
1562   int          ierr,size;
1563 
1564   PetscFunctionBegin;
1565   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1566   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1567   B->m = B->M = PetscMax(B->m,B->M);
1568   B->n = B->N = PetscMax(B->n,B->N);
1569 
1570   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1571   B->data = (void*)b;
1572   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1573   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1574   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1575   B->ops->view        = MatView_SeqSBAIJ;
1576   B->factor           = 0;
1577   B->lupivotthreshold = 1.0;
1578   B->mapping          = 0;
1579   b->row              = 0;
1580   b->icol             = 0;
1581   b->reallocs         = 0;
1582   b->saved_values     = 0;
1583 
1584   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1585   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1586 
1587   b->sorted           = PETSC_FALSE;
1588   b->roworiented      = PETSC_TRUE;
1589   b->nonew            = 0;
1590   b->diag             = 0;
1591   b->solve_work       = 0;
1592   b->mult_work        = 0;
1593   B->spptr            = 0;
1594   b->keepzeroedrows   = PETSC_FALSE;
1595   b->xtoy             = 0;
1596   b->XtoY             = 0;
1597 
1598   b->inew             = 0;
1599   b->jnew             = 0;
1600   b->anew             = 0;
1601   b->a2anew           = 0;
1602   b->permute          = PETSC_FALSE;
1603 
1604   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1605                                      "MatStoreValues_SeqSBAIJ",
1606                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1607   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1608                                      "MatRetrieveValues_SeqSBAIJ",
1609                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1610   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1611                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1612                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1613   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1614                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1615                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1616   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1617                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1618                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1619   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1620                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1621                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1622   PetscFunctionReturn(0);
1623 }
1624 EXTERN_C_END
1625 
1626 #undef __FUNCT__
1627 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1628 /*@C
1629    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1630    compressed row) format.  For good matrix assembly performance the
1631    user should preallocate the matrix storage by setting the parameter nz
1632    (or the array nnz).  By setting these parameters accurately, performance
1633    during matrix assembly can be increased by more than a factor of 50.
1634 
1635    Collective on Mat
1636 
1637    Input Parameters:
1638 +  A - the symmetric matrix
1639 .  bs - size of block
1640 .  nz - number of block nonzeros per block row (same for all rows)
1641 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1642          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1643 
1644    Options Database Keys:
1645 .   -mat_no_unroll - uses code that does not unroll the loops in the
1646                      block calculations (much slower)
1647 .    -mat_block_size - size of the blocks to use
1648 
1649    Level: intermediate
1650 
1651    Notes:
1652    Specify the preallocated storage with either nz or nnz (not both).
1653    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1654    allocation.  For additional details, see the users manual chapter on
1655    matrices.
1656 
1657 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1658 @*/
1659 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1660   int ierr,(*f)(Mat,int,int,const int[]);
1661 
1662   PetscFunctionBegin;
1663   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1664   if (f) {
1665     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1666   }
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatCreateSeqSBAIJ"
1672 /*@C
1673    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1674    compressed row) format.  For good matrix assembly performance the
1675    user should preallocate the matrix storage by setting the parameter nz
1676    (or the array nnz).  By setting these parameters accurately, performance
1677    during matrix assembly can be increased by more than a factor of 50.
1678 
1679    Collective on MPI_Comm
1680 
1681    Input Parameters:
1682 +  comm - MPI communicator, set to PETSC_COMM_SELF
1683 .  bs - size of block
1684 .  m - number of rows, or number of columns
1685 .  nz - number of block nonzeros per block row (same for all rows)
1686 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1687          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1688 
1689    Output Parameter:
1690 .  A - the symmetric matrix
1691 
1692    Options Database Keys:
1693 .   -mat_no_unroll - uses code that does not unroll the loops in the
1694                      block calculations (much slower)
1695 .    -mat_block_size - size of the blocks to use
1696 
1697    Level: intermediate
1698 
1699    Notes:
1700 
1701    Specify the preallocated storage with either nz or nnz (not both).
1702    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1703    allocation.  For additional details, see the users manual chapter on
1704    matrices.
1705 
1706 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1707 @*/
1708 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1709 {
1710   int ierr;
1711 
1712   PetscFunctionBegin;
1713   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1714   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1715   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1721 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1722 {
1723   Mat         C;
1724   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1725   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1726 
1727   PetscFunctionBegin;
1728   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1729 
1730   *B = 0;
1731   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1732   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1733   c    = (Mat_SeqSBAIJ*)C->data;
1734 
1735   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1736   C->preallocated   = PETSC_TRUE;
1737   C->factor         = A->factor;
1738   c->row            = 0;
1739   c->icol           = 0;
1740   c->saved_values   = 0;
1741   c->keepzeroedrows = a->keepzeroedrows;
1742   C->assembled      = PETSC_TRUE;
1743 
1744   c->bs         = a->bs;
1745   c->bs2        = a->bs2;
1746   c->mbs        = a->mbs;
1747   c->nbs        = a->nbs;
1748 
1749   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1750   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1751   for (i=0; i<mbs; i++) {
1752     c->imax[i] = a->imax[i];
1753     c->ilen[i] = a->ilen[i];
1754   }
1755 
1756   /* allocate the matrix space */
1757   c->singlemalloc = PETSC_TRUE;
1758   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1759   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1760   c->j = (int*)(c->a + nz*bs2);
1761   c->i = c->j + nz;
1762   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1763   if (mbs > 0) {
1764     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1765     if (cpvalues == MAT_COPY_VALUES) {
1766       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1767     } else {
1768       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1769     }
1770   }
1771 
1772   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1773   c->sorted      = a->sorted;
1774   c->roworiented = a->roworiented;
1775   c->nonew       = a->nonew;
1776 
1777   if (a->diag) {
1778     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1779     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1780     for (i=0; i<mbs; i++) {
1781       c->diag[i] = a->diag[i];
1782     }
1783   } else c->diag        = 0;
1784   c->nz               = a->nz;
1785   c->maxnz            = a->maxnz;
1786   c->solve_work         = 0;
1787   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1788   c->mult_work          = 0;
1789   *B = C;
1790   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1796 int MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1797 {
1798   Mat_SeqSBAIJ *a;
1799   Mat          B;
1800   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1801   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1802   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1803   int          *masked,nmask,tmp,bs2,ishift;
1804   PetscScalar  *aa;
1805   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1806 
1807   PetscFunctionBegin;
1808   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1809   bs2  = bs*bs;
1810 
1811   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1812   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1813   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1814   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1815   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1816   M = header[1]; N = header[2]; nz = header[3];
1817 
1818   if (header[3] < 0) {
1819     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1820   }
1821 
1822   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1823 
1824   /*
1825      This code adds extra rows to make sure the number of rows is
1826     divisible by the blocksize
1827   */
1828   mbs        = M/bs;
1829   extra_rows = bs - M + bs*(mbs);
1830   if (extra_rows == bs) extra_rows = 0;
1831   else                  mbs++;
1832   if (extra_rows) {
1833     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1834   }
1835 
1836   /* read in row lengths */
1837   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1838   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1839   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1840 
1841   /* read in column indices */
1842   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1843   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1844   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1845 
1846   /* loop over row lengths determining block row lengths */
1847   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1848   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1849   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1850   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1851   masked   = mask + mbs;
1852   rowcount = 0; nzcount = 0;
1853   for (i=0; i<mbs; i++) {
1854     nmask = 0;
1855     for (j=0; j<bs; j++) {
1856       kmax = rowlengths[rowcount];
1857       for (k=0; k<kmax; k++) {
1858         tmp = jj[nzcount++]/bs;   /* block col. index */
1859         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1860       }
1861       rowcount++;
1862     }
1863     s_browlengths[i] += nmask;
1864 
1865     /* zero out the mask elements we set */
1866     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1867   }
1868 
1869   /* create our matrix */
1870   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1871   ierr = MatSetType(B,type);CHKERRQ(ierr);
1872   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1873   a = (Mat_SeqSBAIJ*)B->data;
1874 
1875   /* set matrix "i" values */
1876   a->i[0] = 0;
1877   for (i=1; i<= mbs; i++) {
1878     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1879     a->ilen[i-1] = s_browlengths[i-1];
1880   }
1881   a->nz = a->i[mbs];
1882 
1883   /* read in nonzero values */
1884   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1885   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1886   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1887 
1888   /* set "a" and "j" values into matrix */
1889   nzcount = 0; jcount = 0;
1890   for (i=0; i<mbs; i++) {
1891     nzcountb = nzcount;
1892     nmask    = 0;
1893     for (j=0; j<bs; j++) {
1894       kmax = rowlengths[i*bs+j];
1895       for (k=0; k<kmax; k++) {
1896         tmp = jj[nzcount++]/bs; /* block col. index */
1897         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1898       }
1899     }
1900     /* sort the masked values */
1901     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1902 
1903     /* set "j" values into matrix */
1904     maskcount = 1;
1905     for (j=0; j<nmask; j++) {
1906       a->j[jcount++]  = masked[j];
1907       mask[masked[j]] = maskcount++;
1908     }
1909 
1910     /* set "a" values into matrix */
1911     ishift = bs2*a->i[i];
1912     for (j=0; j<bs; j++) {
1913       kmax = rowlengths[i*bs+j];
1914       for (k=0; k<kmax; k++) {
1915         tmp       = jj[nzcountb]/bs ; /* block col. index */
1916         if (tmp >= i){
1917           block     = mask[tmp] - 1;
1918           point     = jj[nzcountb] - bs*tmp;
1919           idx       = ishift + bs2*block + j + bs*point;
1920           a->a[idx] = aa[nzcountb];
1921         }
1922         nzcountb++;
1923       }
1924     }
1925     /* zero out the mask elements we set */
1926     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1927   }
1928   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1929 
1930   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1931   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1932   ierr = PetscFree(aa);CHKERRQ(ierr);
1933   ierr = PetscFree(jj);CHKERRQ(ierr);
1934   ierr = PetscFree(mask);CHKERRQ(ierr);
1935 
1936   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1937   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1938   ierr = MatView_Private(B);CHKERRQ(ierr);
1939   *A = B;
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1945 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1946 {
1947   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1948   MatScalar    *aa=a->a,*v,*v1;
1949   PetscScalar  *x,*b,*t,sum,d;
1950   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1951   int          nz,nz1,*vj,*vj1,i;
1952 
1953   PetscFunctionBegin;
1954   its = its*lits;
1955   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1956 
1957   if (bs > 1)
1958     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1959 
1960   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1961   if (xx != bb) {
1962     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1963   } else {
1964     b = x;
1965   }
1966 
1967   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1968 
1969   if (flag & SOR_ZERO_INITIAL_GUESS) {
1970     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1971       for (i=0; i<m; i++)
1972         t[i] = b[i];
1973 
1974       for (i=0; i<m; i++){
1975         d  = *(aa + ai[i]);  /* diag[i] */
1976         v  = aa + ai[i] + 1;
1977         vj = aj + ai[i] + 1;
1978         nz = ai[i+1] - ai[i] - 1;
1979         x[i] = omega*t[i]/d;
1980         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1981         PetscLogFlops(2*nz-1);
1982       }
1983     }
1984 
1985     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1986       for (i=0; i<m; i++)
1987         t[i] = b[i];
1988 
1989       for (i=0; i<m-1; i++){  /* update rhs */
1990         v  = aa + ai[i] + 1;
1991         vj = aj + ai[i] + 1;
1992         nz = ai[i+1] - ai[i] - 1;
1993         while (nz--) t[*vj++] -= x[i]*(*v++);
1994         PetscLogFlops(2*nz-1);
1995       }
1996       for (i=m-1; i>=0; i--){
1997         d  = *(aa + ai[i]);
1998         v  = aa + ai[i] + 1;
1999         vj = aj + ai[i] + 1;
2000         nz = ai[i+1] - ai[i] - 1;
2001         sum = t[i];
2002         while (nz--) sum -= x[*vj++]*(*v++);
2003         PetscLogFlops(2*nz-1);
2004         x[i] =   (1-omega)*x[i] + omega*sum/d;
2005       }
2006     }
2007     its--;
2008   }
2009 
2010   while (its--) {
2011     /*
2012        forward sweep:
2013        for i=0,...,m-1:
2014          sum[i] = (b[i] - U(i,:)x )/d[i];
2015          x[i]   = (1-omega)x[i] + omega*sum[i];
2016          b      = b - x[i]*U^T(i,:);
2017 
2018     */
2019     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2020       for (i=0; i<m; i++)
2021         t[i] = b[i];
2022 
2023       for (i=0; i<m; i++){
2024         d  = *(aa + ai[i]);  /* diag[i] */
2025         v  = aa + ai[i] + 1; v1=v;
2026         vj = aj + ai[i] + 1; vj1=vj;
2027         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2028         sum = t[i];
2029         while (nz1--) sum -= (*v1++)*x[*vj1++];
2030         x[i] = (1-omega)*x[i] + omega*sum/d;
2031         while (nz--) t[*vj++] -= x[i]*(*v++);
2032         PetscLogFlops(4*nz-2);
2033       }
2034     }
2035 
2036   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2037       /*
2038        backward sweep:
2039        b = b - x[i]*U^T(i,:), i=0,...,n-2
2040        for i=m-1,...,0:
2041          sum[i] = (b[i] - U(i,:)x )/d[i];
2042          x[i]   = (1-omega)x[i] + omega*sum[i];
2043       */
2044       for (i=0; i<m; i++)
2045         t[i] = b[i];
2046 
2047       for (i=0; i<m-1; i++){  /* update rhs */
2048         v  = aa + ai[i] + 1;
2049         vj = aj + ai[i] + 1;
2050         nz = ai[i+1] - ai[i] - 1;
2051         while (nz--) t[*vj++] -= x[i]*(*v++);
2052         PetscLogFlops(2*nz-1);
2053       }
2054       for (i=m-1; i>=0; i--){
2055         d  = *(aa + ai[i]);
2056         v  = aa + ai[i] + 1;
2057         vj = aj + ai[i] + 1;
2058         nz = ai[i+1] - ai[i] - 1;
2059         sum = t[i];
2060         while (nz--) sum -= x[*vj++]*(*v++);
2061         PetscLogFlops(2*nz-1);
2062         x[i] =   (1-omega)*x[i] + omega*sum/d;
2063       }
2064     }
2065   }
2066 
2067   ierr = PetscFree(t); CHKERRQ(ierr);
2068   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2069   if (bb != xx) {
2070     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2071   }
2072 
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 
2077 
2078 
2079 
2080 
2081