xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 69a612a9be7fbcc0ab27d96ce41c4e3795ac7f2a)
1 
2 /*
3     Defines the basic matrix operations for the SBAIJ (compressed row)
4   matrix storage format.
5 */
6 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
7 #include "src/inline/spops.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 
10 #define CHUNKSIZE  10
11 
12 /*
13      Checks for missing diagonals
14 */
15 #undef __FUNCT__
16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
18 {
19   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
20   PetscErrorCode ierr;
21   int         *diag,*jj = a->j,i;
22 
23   PetscFunctionBegin;
24   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
25   diag = a->diag;
26   for (i=0; i<a->mbs; i++) {
27     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
28   }
29   PetscFunctionReturn(0);
30 }
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
35 {
36   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
37   PetscErrorCode ierr;
38   int          i,mbs = a->mbs;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int nz = a->i[n];
63     for (i=0; i<nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int nz = a->i[n]-1;
84     for (i=0; i<nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
93 {
94   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
95   PetscErrorCode ierr;
96 
97   PetscFunctionBegin;
98   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz);
99   ierr = PetscFree(a->a);CHKERRQ(ierr);
100   if (!a->singlemalloc) {
101     ierr = PetscFree(a->i);CHKERRQ(ierr);
102     ierr = PetscFree(a->j);CHKERRQ(ierr);
103   }
104   if (a->row) {
105     ierr = ISDestroy(a->row);CHKERRQ(ierr);
106   }
107   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
108   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
109   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
110   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
111   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
112   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
113   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
114   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
115   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
116 
117   if (a->inew){
118     ierr = PetscFree(a->inew);CHKERRQ(ierr);
119     a->inew = 0;
120   }
121   ierr = PetscFree(a);CHKERRQ(ierr);
122 
123   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
124   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
125   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
126   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
127   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
128   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
135 {
136   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
137 
138   PetscFunctionBegin;
139   switch (op) {
140   case MAT_ROW_ORIENTED:
141     a->roworiented    = PETSC_TRUE;
142     break;
143   case MAT_COLUMN_ORIENTED:
144     a->roworiented    = PETSC_FALSE;
145     break;
146   case MAT_COLUMNS_SORTED:
147     a->sorted         = PETSC_TRUE;
148     break;
149   case MAT_COLUMNS_UNSORTED:
150     a->sorted         = PETSC_FALSE;
151     break;
152   case MAT_KEEP_ZEROED_ROWS:
153     a->keepzeroedrows = PETSC_TRUE;
154     break;
155   case MAT_NO_NEW_NONZERO_LOCATIONS:
156     a->nonew          = 1;
157     break;
158   case MAT_NEW_NONZERO_LOCATION_ERR:
159     a->nonew          = -1;
160     break;
161   case MAT_NEW_NONZERO_ALLOCATION_ERR:
162     a->nonew          = -2;
163     break;
164   case MAT_YES_NEW_NONZERO_LOCATIONS:
165     a->nonew          = 0;
166     break;
167   case MAT_ROWS_SORTED:
168   case MAT_ROWS_UNSORTED:
169   case MAT_YES_NEW_DIAGONALS:
170   case MAT_IGNORE_OFF_PROC_ENTRIES:
171   case MAT_USE_HASH_TABLE:
172     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
173     break;
174   case MAT_NO_NEW_DIAGONALS:
175     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
176   case MAT_NOT_SYMMETRIC:
177   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
178   case MAT_HERMITIAN:
179     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
180   case MAT_SYMMETRIC:
181   case MAT_STRUCTURALLY_SYMMETRIC:
182   case MAT_NOT_HERMITIAN:
183   case MAT_SYMMETRY_ETERNAL:
184   case MAT_NOT_SYMMETRY_ETERNAL:
185     break;
186   default:
187     SETERRQ(PETSC_ERR_SUP,"unknown option");
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
194 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
195 {
196   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
197   PetscErrorCode ierr;
198   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
199   MatScalar    *aa,*aa_i;
200   PetscScalar  *v_i;
201 
202   PetscFunctionBegin;
203   bs  = A->bs;
204   ai  = a->i;
205   aj  = a->j;
206   aa  = a->a;
207   bs2 = a->bs2;
208 
209   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
210 
211   bn  = row/bs;   /* Block number */
212   bp  = row % bs; /* Block position */
213   M   = ai[bn+1] - ai[bn];
214   *ncols = bs*M;
215 
216   if (v) {
217     *v = 0;
218     if (*ncols) {
219       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
220       for (i=0; i<M; i++) { /* for each block in the block row */
221         v_i  = *v + i*bs;
222         aa_i = aa + bs2*(ai[bn] + i);
223         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
224       }
225     }
226   }
227 
228   if (cols) {
229     *cols = 0;
230     if (*ncols) {
231       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
232       for (i=0; i<M; i++) { /* for each block in the block row */
233         cols_i = *cols + i*bs;
234         itmp  = bs*aj[ai[bn] + i];
235         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
236       }
237     }
238   }
239 
240   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
241   /* this segment is currently removed, so only entries in the upper triangle are obtained */
242 #ifdef column_search
243   v_i    = *v    + M*bs;
244   cols_i = *cols + M*bs;
245   for (i=0; i<bn; i++){ /* for each block row */
246     M = ai[i+1] - ai[i];
247     for (j=0; j<M; j++){
248       itmp = aj[ai[i] + j];    /* block column value */
249       if (itmp == bn){
250         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
251         for (k=0; k<bs; k++) {
252           *cols_i++ = i*bs+k;
253           *v_i++    = aa_i[k];
254         }
255         *ncols += bs;
256         break;
257       }
258     }
259   }
260 #endif
261 
262   PetscFunctionReturn(0);
263 }
264 
265 #undef __FUNCT__
266 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
267 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
268 {
269   PetscErrorCode ierr;
270 
271   PetscFunctionBegin;
272   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
273   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
279 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
280 {
281   PetscErrorCode ierr;
282   PetscFunctionBegin;
283   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 #undef __FUNCT__
288 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
289 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
290 {
291   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
292   PetscErrorCode    ierr;
293   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
294   char              *name;
295   PetscViewerFormat format;
296 
297   PetscFunctionBegin;
298   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
299   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
300   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
301     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
302   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
303     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
304   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
305     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
306     for (i=0; i<a->mbs; i++) {
307       for (j=0; j<bs; j++) {
308         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
309         for (k=a->i[i]; k<a->i[i+1]; k++) {
310           for (l=0; l<bs; l++) {
311 #if defined(PETSC_USE_COMPLEX)
312             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
313               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
314                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
315             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
316               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
317                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
318             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
319               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
320             }
321 #else
322             if (a->a[bs2*k + l*bs + j] != 0.0) {
323               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
324             }
325 #endif
326           }
327         }
328         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
329       }
330     }
331     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
332   } else {
333     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
334     for (i=0; i<a->mbs; i++) {
335       for (j=0; j<bs; j++) {
336         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
337         for (k=a->i[i]; k<a->i[i+1]; k++) {
338           for (l=0; l<bs; l++) {
339 #if defined(PETSC_USE_COMPLEX)
340             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
341               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
342                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
343             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
344               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
345                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
346             } else {
347               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
348             }
349 #else
350             ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
351 #endif
352           }
353         }
354         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
355       }
356     }
357     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
358   }
359   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
365 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
366 {
367   Mat            A = (Mat) Aa;
368   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
369   PetscErrorCode ierr;
370   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2,rank;
371   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
372   MatScalar      *aa;
373   MPI_Comm       comm;
374   PetscViewer    viewer;
375 
376   PetscFunctionBegin;
377   /*
378       This is nasty. If this is called from an originally parallel matrix
379    then all processes call this,but only the first has the matrix so the
380    rest should return immediately.
381   */
382   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
383   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
384   if (rank) PetscFunctionReturn(0);
385 
386   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
387 
388   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
389   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
390 
391   /* loop over matrix elements drawing boxes */
392   color = PETSC_DRAW_BLUE;
393   for (i=0,row=0; i<mbs; i++,row+=bs) {
394     for (j=a->i[i]; j<a->i[i+1]; j++) {
395       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
396       x_l = a->j[j]*bs; x_r = x_l + 1.0;
397       aa = a->a + j*bs2;
398       for (k=0; k<bs; k++) {
399         for (l=0; l<bs; l++) {
400           if (PetscRealPart(*aa++) >=  0.) continue;
401           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
402         }
403       }
404     }
405   }
406   color = PETSC_DRAW_CYAN;
407   for (i=0,row=0; i<mbs; i++,row+=bs) {
408     for (j=a->i[i]; j<a->i[i+1]; j++) {
409       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
410       x_l = a->j[j]*bs; x_r = x_l + 1.0;
411       aa = a->a + j*bs2;
412       for (k=0; k<bs; k++) {
413         for (l=0; l<bs; l++) {
414           if (PetscRealPart(*aa++) != 0.) continue;
415           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
416         }
417       }
418     }
419   }
420 
421   color = PETSC_DRAW_RED;
422   for (i=0,row=0; i<mbs; i++,row+=bs) {
423     for (j=a->i[i]; j<a->i[i+1]; j++) {
424       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
425       x_l = a->j[j]*bs; x_r = x_l + 1.0;
426       aa = a->a + j*bs2;
427       for (k=0; k<bs; k++) {
428         for (l=0; l<bs; l++) {
429           if (PetscRealPart(*aa++) <= 0.) continue;
430           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
431         }
432       }
433     }
434   }
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNCT__
439 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
440 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
441 {
442   PetscErrorCode ierr;
443   PetscReal    xl,yl,xr,yr,w,h;
444   PetscDraw    draw;
445   PetscTruth   isnull;
446 
447   PetscFunctionBegin;
448 
449   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
450   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
451 
452   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
453   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
454   xr += w;    yr += h;  xl = -w;     yl = -h;
455   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
456   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
457   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatView_SeqSBAIJ"
463 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
464 {
465   PetscErrorCode ierr;
466   PetscTruth iascii,isdraw;
467 
468   PetscFunctionBegin;
469   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
470   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
471   if (iascii){
472     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
473   } else if (isdraw) {
474     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
475   } else {
476     Mat B;
477     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
478     ierr = MatView(B,viewer);CHKERRQ(ierr);
479     ierr = MatDestroy(B);CHKERRQ(ierr);
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
487 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
488 {
489   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
490   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
491   int        *ai = a->i,*ailen = a->ilen;
492   int        brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
493   MatScalar  *ap,*aa = a->a,zero = 0.0;
494 
495   PetscFunctionBegin;
496   for (k=0; k<m; k++) { /* loop over rows */
497     row  = im[k]; brow = row/bs;
498     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
499     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
500     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
501     nrow = ailen[brow];
502     for (l=0; l<n; l++) { /* loop over columns */
503       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]);
504       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
505       col  = in[l] ;
506       bcol = col/bs;
507       cidx = col%bs;
508       ridx = row%bs;
509       high = nrow;
510       low  = 0; /* assume unsorted */
511       while (high-low > 5) {
512         t = (low+high)/2;
513         if (rp[t] > bcol) high = t;
514         else             low  = t;
515       }
516       for (i=low; i<high; i++) {
517         if (rp[i] > bcol) break;
518         if (rp[i] == bcol) {
519           *v++ = ap[bs2*i+bs*cidx+ridx];
520           goto finished;
521         }
522       }
523       *v++ = zero;
524       finished:;
525     }
526   }
527   PetscFunctionReturn(0);
528 }
529 
530 
531 #undef __FUNCT__
532 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
533 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
534 {
535   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
536   PetscErrorCode ierr;
537   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
538   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
539   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
540   PetscTruth      roworiented=a->roworiented;
541   const MatScalar *value = v;
542   MatScalar       *ap,*aa = a->a,*bap;
543 
544   PetscFunctionBegin;
545   if (roworiented) {
546     stepval = (n-1)*bs;
547   } else {
548     stepval = (m-1)*bs;
549   }
550   for (k=0; k<m; k++) { /* loop over added rows */
551     row  = im[k];
552     if (row < 0) continue;
553 #if defined(PETSC_USE_BOPT_g)
554     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
555 #endif
556     rp   = aj + ai[row];
557     ap   = aa + bs2*ai[row];
558     rmax = imax[row];
559     nrow = ailen[row];
560     low  = 0;
561     for (l=0; l<n; l++) { /* loop over added columns */
562       if (in[l] < 0) continue;
563       col = in[l];
564 #if defined(PETSC_USE_BOPT_g)
565       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
566 #endif
567       if (col < row) continue; /* ignore lower triangular block */
568       if (roworiented) {
569         value = v + k*(stepval+bs)*bs + l*bs;
570       } else {
571         value = v + l*(stepval+bs)*bs + k*bs;
572       }
573       if (!sorted) low = 0; high = nrow;
574       while (high-low > 7) {
575         t = (low+high)/2;
576         if (rp[t] > col) high = t;
577         else             low  = t;
578       }
579       for (i=low; i<high; i++) {
580         if (rp[i] > col) break;
581         if (rp[i] == col) {
582           bap  = ap +  bs2*i;
583           if (roworiented) {
584             if (is == ADD_VALUES) {
585               for (ii=0; ii<bs; ii++,value+=stepval) {
586                 for (jj=ii; jj<bs2; jj+=bs) {
587                   bap[jj] += *value++;
588                 }
589               }
590             } else {
591               for (ii=0; ii<bs; ii++,value+=stepval) {
592                 for (jj=ii; jj<bs2; jj+=bs) {
593                   bap[jj] = *value++;
594                 }
595               }
596             }
597           } else {
598             if (is == ADD_VALUES) {
599               for (ii=0; ii<bs; ii++,value+=stepval) {
600                 for (jj=0; jj<bs; jj++) {
601                   *bap++ += *value++;
602                 }
603               }
604             } else {
605               for (ii=0; ii<bs; ii++,value+=stepval) {
606                 for (jj=0; jj<bs; jj++) {
607                   *bap++  = *value++;
608                 }
609               }
610             }
611           }
612           goto noinsert2;
613         }
614       }
615       if (nonew == 1) goto noinsert2;
616       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
617       if (nrow >= rmax) {
618         /* there is no extra room in row, therefore enlarge */
619         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
620         MatScalar *new_a;
621 
622         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
623 
624         /* malloc new storage space */
625         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
626 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
627         new_j   = (int*)(new_a + bs2*new_nz);
628         new_i   = new_j + new_nz;
629 
630         /* copy over old data into new slots */
631         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
632         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
633         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
634         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
635         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
636         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
637         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
638         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
639         /* free up old matrix storage */
640         ierr = PetscFree(a->a);CHKERRQ(ierr);
641         if (!a->singlemalloc) {
642           ierr = PetscFree(a->i);CHKERRQ(ierr);
643           ierr = PetscFree(a->j);CHKERRQ(ierr);
644         }
645         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
646         a->singlemalloc = PETSC_TRUE;
647 
648         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
649         rmax = imax[row] = imax[row] + CHUNKSIZE;
650         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
651         a->maxnz += bs2*CHUNKSIZE;
652         a->reallocs++;
653         a->nz++;
654       }
655       N = nrow++ - 1;
656       /* shift up all the later entries in this row */
657       for (ii=N; ii>=i; ii--) {
658         rp[ii+1] = rp[ii];
659         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
660       }
661       if (N >= i) {
662         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
663       }
664       rp[i] = col;
665       bap   = ap +  bs2*i;
666       if (roworiented) {
667         for (ii=0; ii<bs; ii++,value+=stepval) {
668           for (jj=ii; jj<bs2; jj+=bs) {
669             bap[jj] = *value++;
670           }
671         }
672       } else {
673         for (ii=0; ii<bs; ii++,value+=stepval) {
674           for (jj=0; jj<bs; jj++) {
675             *bap++  = *value++;
676           }
677         }
678       }
679       noinsert2:;
680       low = i;
681     }
682     ailen[row] = nrow;
683   }
684   PetscFunctionReturn(0);
685 }
686 
687 #undef __FUNCT__
688 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
689 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
690 {
691   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
692   PetscErrorCode ierr;
693   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
694   int        m = A->m,*ip,N,*ailen = a->ilen;
695   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0;
696   MatScalar  *aa = a->a,*ap;
697 
698   PetscFunctionBegin;
699   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
700 
701   if (m) rmax = ailen[0];
702   for (i=1; i<mbs; i++) {
703     /* move each row back by the amount of empty slots (fshift) before it*/
704     fshift += imax[i-1] - ailen[i-1];
705     rmax   = PetscMax(rmax,ailen[i]);
706     if (fshift) {
707       ip = aj + ai[i]; ap = aa + bs2*ai[i];
708       N = ailen[i];
709       for (j=0; j<N; j++) {
710         ip[j-fshift] = ip[j];
711         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
712       }
713     }
714     ai[i] = ai[i-1] + ailen[i-1];
715   }
716   if (mbs) {
717     fshift += imax[mbs-1] - ailen[mbs-1];
718     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
719   }
720   /* reset ilen and imax for each row */
721   for (i=0; i<mbs; i++) {
722     ailen[i] = imax[i] = ai[i+1] - ai[i];
723   }
724   a->nz = ai[mbs];
725 
726   /* diagonals may have moved, reset it */
727   if (a->diag) {
728     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
729   }
730   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",
731            m,A->m,A->bs,fshift*bs2,a->nz*bs2);
732   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",
733            a->reallocs);
734   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax);
735   a->reallocs          = 0;
736   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
737 
738   PetscFunctionReturn(0);
739 }
740 
741 /*
742    This function returns an array of flags which indicate the locations of contiguous
743    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
744    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
745    Assume: sizes should be long enough to hold all the values.
746 */
747 #undef __FUNCT__
748 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
749 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
750 {
751   int        i,j,k,row;
752   PetscTruth flg;
753 
754   PetscFunctionBegin;
755   for (i=0,j=0; i<n; j++) {
756     row = idx[i];
757     if (row%bs!=0) { /* Not the begining of a block */
758       sizes[j] = 1;
759       i++;
760     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
761       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
762       i++;
763     } else { /* Begining of the block, so check if the complete block exists */
764       flg = PETSC_TRUE;
765       for (k=1; k<bs; k++) {
766         if (row+k != idx[i+k]) { /* break in the block */
767           flg = PETSC_FALSE;
768           break;
769         }
770       }
771       if (flg == PETSC_TRUE) { /* No break in the bs */
772         sizes[j] = bs;
773         i+= bs;
774       } else {
775         sizes[j] = 1;
776         i++;
777       }
778     }
779   }
780   *bs_max = j;
781   PetscFunctionReturn(0);
782 }
783 
784 #undef __FUNCT__
785 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
786 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
787 {
788   PetscFunctionBegin;
789   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
790 }
791 
792 /* Only add/insert a(i,j) with i<=j (blocks).
793    Any a(i,j) with i>j input by user is ingored.
794 */
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
798 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
799 {
800   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
801   PetscErrorCode ierr;
802   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
803   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
804   int         *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
805   int         ridx,cidx,bs2=a->bs2;
806   MatScalar   *ap,value,*aa=a->a,*bap;
807 
808   PetscFunctionBegin;
809 
810   for (k=0; k<m; k++) { /* loop over added rows */
811     row  = im[k];       /* row number */
812     brow = row/bs;      /* block row number */
813     if (row < 0) continue;
814 #if defined(PETSC_USE_BOPT_g)
815     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
816 #endif
817     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
818     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
819     rmax = imax[brow];  /* maximum space allocated for this row */
820     nrow = ailen[brow]; /* actual length of this row */
821     low  = 0;
822 
823     for (l=0; l<n; l++) { /* loop over added columns */
824       if (in[l] < 0) continue;
825 #if defined(PETSC_USE_BOPT_g)
826       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1);
827 #endif
828       col = in[l];
829       bcol = col/bs;              /* block col number */
830 
831       if (brow <= bcol){
832         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
833         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
834           /* element value a(k,l) */
835           if (roworiented) {
836             value = v[l + k*n];
837           } else {
838             value = v[k + l*m];
839           }
840 
841           /* move pointer bap to a(k,l) quickly and add/insert value */
842           if (!sorted) low = 0; high = nrow;
843           while (high-low > 7) {
844             t = (low+high)/2;
845             if (rp[t] > bcol) high = t;
846             else              low  = t;
847           }
848           for (i=low; i<high; i++) {
849             /* printf("The loop of i=low.., rp[%D]=%D\n",i,rp[i]); */
850             if (rp[i] > bcol) break;
851             if (rp[i] == bcol) {
852               bap  = ap +  bs2*i + bs*cidx + ridx;
853               if (is == ADD_VALUES) *bap += value;
854               else                  *bap  = value;
855               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
856               if (brow == bcol && ridx < cidx){
857                 bap  = ap +  bs2*i + bs*ridx + cidx;
858                 if (is == ADD_VALUES) *bap += value;
859                 else                  *bap  = value;
860               }
861               goto noinsert1;
862             }
863           }
864 
865       if (nonew == 1) goto noinsert1;
866       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
867       if (nrow >= rmax) {
868         /* there is no extra room in row, therefore enlarge */
869         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
870         MatScalar *new_a;
871 
872         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
873 
874         /* Malloc new storage space */
875         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
876         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
877         new_j = (int*)(new_a + bs2*new_nz);
878         new_i = new_j + new_nz;
879 
880         /* copy over old data into new slots */
881         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
882         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
883         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
884         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
885         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
886         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
887         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
888         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
889         /* free up old matrix storage */
890         ierr = PetscFree(a->a);CHKERRQ(ierr);
891         if (!a->singlemalloc) {
892           ierr = PetscFree(a->i);CHKERRQ(ierr);
893           ierr = PetscFree(a->j);CHKERRQ(ierr);
894         }
895         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
896         a->singlemalloc = PETSC_TRUE;
897 
898         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
899         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
900         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
901         a->maxnz += bs2*CHUNKSIZE;
902         a->reallocs++;
903         a->nz++;
904       }
905 
906       N = nrow++ - 1;
907       /* shift up all the later entries in this row */
908       for (ii=N; ii>=i; ii--) {
909         rp[ii+1] = rp[ii];
910         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
911       }
912       if (N>=i) {
913         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
914       }
915       rp[i]                      = bcol;
916       ap[bs2*i + bs*cidx + ridx] = value;
917       noinsert1:;
918       low = i;
919       /* } */
920         }
921       } /* end of if .. if..  */
922     }                     /* end of loop over added columns */
923     ailen[brow] = nrow;
924   }                       /* end of loop over added rows */
925 
926   PetscFunctionReturn(0);
927 }
928 
929 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
930 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
931 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
932 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
933 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
934 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat);
935 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
936 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
937 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
938 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
939 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
940 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
941 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
942 
943 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
951 
952 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
953 
954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
962 
963 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
964 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
965 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
966 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
967 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
968 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
969 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
970 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
971 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
972 
973 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
974 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
975 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
976 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
977 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
978 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
979 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
980 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
981 
982 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
983 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
984 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
985 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
986 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
987 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
988 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
989 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
990 
991 #ifdef HAVE_MatICCFactor
992 /* modified from MatILUFactor_SeqSBAIJ, needs further work!  */
993 #undef __FUNCT__
994 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
995 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
996 {
997   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
998   Mat         outA;
999   PetscErrorCode ierr;
1000   PetscTruth  row_identity,col_identity;
1001 
1002   PetscFunctionBegin;
1003   outA          = inA;
1004   inA->factor   = FACTOR_CHOLESKY;
1005 
1006   if (!a->diag) {
1007     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1008   }
1009   /*
1010       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1011       for ILU(0) factorization with natural ordering
1012   */
1013   switch (a->bs) {
1014   case 1:
1015     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1016     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1017     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1018     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1019   case 2:
1020     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1021     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1022     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1023     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1024     break;
1025   case 3:
1026     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1027     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1028     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1029     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1030     break;
1031   case 4:
1032     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1033     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1034     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1035     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1036     break;
1037   case 5:
1038     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1039     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1040     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1041     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1042     break;
1043   case 6:
1044     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1045     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1046     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1047     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1048     break;
1049   case 7:
1050     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1051     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1052     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1053     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1054     break;
1055   default:
1056     a->row        = row;
1057     a->icol       = col;
1058     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1059     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1060 
1061     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1062     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1063     PetscLogObjectParent(inA,a->icol);
1064 
1065     if (!a->solve_work) {
1066       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1067       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1068     }
1069   }
1070 
1071   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1072 
1073   PetscFunctionReturn(0);
1074 }
1075 #endif
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1079 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1080 {
1081   static PetscTruth called = PETSC_FALSE;
1082   MPI_Comm          comm = A->comm;
1083   PetscErrorCode ierr;
1084 
1085   PetscFunctionBegin;
1086   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1087   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1088   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 EXTERN_C_BEGIN
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1095 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1096 {
1097   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1098   int         i,nz,n;
1099 
1100   PetscFunctionBegin;
1101   nz = baij->maxnz;
1102   n  = mat->n;
1103   for (i=0; i<nz; i++) {
1104     baij->j[i] = indices[i];
1105   }
1106   baij->nz = nz;
1107   for (i=0; i<n; i++) {
1108     baij->ilen[i] = baij->imax[i];
1109   }
1110 
1111   PetscFunctionReturn(0);
1112 }
1113 EXTERN_C_END
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1117 /*@
1118     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1119        in the matrix.
1120 
1121   Input Parameters:
1122 +  mat     - the SeqSBAIJ matrix
1123 -  indices - the column indices
1124 
1125   Level: advanced
1126 
1127   Notes:
1128     This can be called if you have precomputed the nonzero structure of the
1129   matrix and want to provide it to the matrix object to improve the performance
1130   of the MatSetValues() operation.
1131 
1132     You MUST have set the correct numbers of nonzeros per row in the call to
1133   MatCreateSeqSBAIJ().
1134 
1135     MUST be called before any calls to MatSetValues()
1136 
1137 .seealso: MatCreateSeqSBAIJ
1138 @*/
1139 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1140 {
1141   PetscErrorCode ierr,(*f)(Mat,int *);
1142 
1143   PetscFunctionBegin;
1144   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1145   PetscValidPointer(indices,2);
1146   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1147   if (f) {
1148     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1149   } else {
1150     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1151   }
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1157 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1158 {
1159   PetscErrorCode ierr;
1160 
1161   PetscFunctionBegin;
1162   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #undef __FUNCT__
1167 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1168 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1169 {
1170   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1171   PetscFunctionBegin;
1172   *array = a->a;
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1178 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1179 {
1180   PetscFunctionBegin;
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #include "petscblaslapack.h"
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1187 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1188 {
1189   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1190   PetscErrorCode ierr;
1191   PetscInt       i,bs=Y->bs,bs2,j;
1192   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1193 
1194   PetscFunctionBegin;
1195   if (str == SAME_NONZERO_PATTERN) {
1196     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1197   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1198     if (y->xtoy && y->XtoY != X) {
1199       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1200       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1201     }
1202     if (!y->xtoy) { /* get xtoy */
1203       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1204       y->XtoY = X;
1205     }
1206     bs2 = bs*bs;
1207     for (i=0; i<x->nz; i++) {
1208       j = 0;
1209       while (j < bs2){
1210         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1211         j++;
1212       }
1213     }
1214     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1215   } else {
1216     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1217   }
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1223 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1224 {
1225   PetscFunctionBegin;
1226   *flg = PETSC_TRUE;
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1232 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1233 {
1234   PetscFunctionBegin;
1235   *flg = PETSC_TRUE;
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1241 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1242 {
1243   PetscFunctionBegin;
1244   *flg = PETSC_FALSE;
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 /* -------------------------------------------------------------------*/
1249 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1250        MatGetRow_SeqSBAIJ,
1251        MatRestoreRow_SeqSBAIJ,
1252        MatMult_SeqSBAIJ_N,
1253 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1254        MatMult_SeqSBAIJ_N,
1255        MatMultAdd_SeqSBAIJ_N,
1256        MatSolve_SeqSBAIJ_N,
1257        0,
1258        0,
1259 /*10*/ 0,
1260        0,
1261        MatCholeskyFactor_SeqSBAIJ,
1262        MatRelax_SeqSBAIJ,
1263        MatTranspose_SeqSBAIJ,
1264 /*15*/ MatGetInfo_SeqSBAIJ,
1265        MatEqual_SeqSBAIJ,
1266        MatGetDiagonal_SeqSBAIJ,
1267        MatDiagonalScale_SeqSBAIJ,
1268        MatNorm_SeqSBAIJ,
1269 /*20*/ 0,
1270        MatAssemblyEnd_SeqSBAIJ,
1271        0,
1272        MatSetOption_SeqSBAIJ,
1273        MatZeroEntries_SeqSBAIJ,
1274 /*25*/ MatZeroRows_SeqSBAIJ,
1275        0,
1276        0,
1277        MatCholeskyFactorSymbolic_SeqSBAIJ,
1278        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1279 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1280        0,
1281        MatICCFactorSymbolic_SeqSBAIJ,
1282        MatGetArray_SeqSBAIJ,
1283        MatRestoreArray_SeqSBAIJ,
1284 /*35*/ MatDuplicate_SeqSBAIJ,
1285        0,
1286        0,
1287        0,
1288        0,
1289 /*40*/ MatAXPY_SeqSBAIJ,
1290        MatGetSubMatrices_SeqSBAIJ,
1291        MatIncreaseOverlap_SeqSBAIJ,
1292        MatGetValues_SeqSBAIJ,
1293        0,
1294 /*45*/ MatPrintHelp_SeqSBAIJ,
1295        MatScale_SeqSBAIJ,
1296        0,
1297        0,
1298        0,
1299 /*50*/ 0,
1300        MatGetRowIJ_SeqSBAIJ,
1301        MatRestoreRowIJ_SeqSBAIJ,
1302        0,
1303        0,
1304 /*55*/ 0,
1305        0,
1306        0,
1307        0,
1308        MatSetValuesBlocked_SeqSBAIJ,
1309 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1310        0,
1311        0,
1312        MatGetPetscMaps_Petsc,
1313        0,
1314 /*65*/ 0,
1315        0,
1316        0,
1317        0,
1318        0,
1319 /*70*/ MatGetRowMax_SeqSBAIJ,
1320        0,
1321        0,
1322        0,
1323        0,
1324 /*75*/ 0,
1325        0,
1326        0,
1327        0,
1328        0,
1329 /*80*/ 0,
1330        0,
1331        0,
1332 #if !defined(PETSC_USE_COMPLEX)
1333        MatGetInertia_SeqSBAIJ,
1334 #else
1335        0,
1336 #endif
1337        MatLoad_SeqSBAIJ,
1338 /*85*/ MatIsSymmetric_SeqSBAIJ,
1339        MatIsHermitian_SeqSBAIJ,
1340        MatIsStructurallySymmetric_SeqSBAIJ,
1341        0,
1342        0,
1343 /*90*/ 0,
1344        0,
1345        0,
1346        0,
1347        0,
1348 /*95*/ 0,
1349        0,
1350        0,
1351        0};
1352 
1353 EXTERN_C_BEGIN
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1356 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat)
1357 {
1358   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1359   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
1360   PetscErrorCode ierr;
1361 
1362   PetscFunctionBegin;
1363   if (aij->nonew != 1) {
1364     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1365   }
1366 
1367   /* allocate space for values if not already there */
1368   if (!aij->saved_values) {
1369     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1370   }
1371 
1372   /* copy values over */
1373   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 EXTERN_C_END
1377 
1378 EXTERN_C_BEGIN
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1381 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat)
1382 {
1383   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1384   PetscErrorCode ierr;
1385   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
1386 
1387   PetscFunctionBegin;
1388   if (aij->nonew != 1) {
1389     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1390   }
1391   if (!aij->saved_values) {
1392     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1393   }
1394 
1395   /* copy values over */
1396   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1397   PetscFunctionReturn(0);
1398 }
1399 EXTERN_C_END
1400 
1401 EXTERN_C_BEGIN
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1404 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1405 {
1406   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1407   PetscErrorCode ierr;
1408   int          i,len,mbs,bs2;
1409   PetscTruth   flg;
1410 
1411   PetscFunctionBegin;
1412   B->preallocated = PETSC_TRUE;
1413   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1414   mbs  = B->m/bs;
1415   bs2  = bs*bs;
1416 
1417   if (mbs*bs != B->m) {
1418     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1419   }
1420 
1421   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1422   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1423   if (nnz) {
1424     for (i=0; i<mbs; i++) {
1425       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1426       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1427     }
1428   }
1429 
1430   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1431   if (!flg) {
1432     switch (bs) {
1433     case 1:
1434       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1435       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1436       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1437       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1438       B->ops->mult            = MatMult_SeqSBAIJ_1;
1439       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1440       break;
1441     case 2:
1442       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1443       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1444       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_2;
1445       B->ops->mult            = MatMult_SeqSBAIJ_2;
1446       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1447       break;
1448     case 3:
1449       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1450       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1451       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_3;
1452       B->ops->mult            = MatMult_SeqSBAIJ_3;
1453       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1454       break;
1455     case 4:
1456       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1457       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1458       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_4;
1459       B->ops->mult            = MatMult_SeqSBAIJ_4;
1460       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1461       break;
1462     case 5:
1463       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1464       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1465       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_5;
1466       B->ops->mult            = MatMult_SeqSBAIJ_5;
1467       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1468       break;
1469     case 6:
1470       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1471       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1472       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_6;
1473       B->ops->mult            = MatMult_SeqSBAIJ_6;
1474       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1475       break;
1476     case 7:
1477       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1478       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1479       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_7;
1480       B->ops->mult            = MatMult_SeqSBAIJ_7;
1481       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1482       break;
1483     }
1484   }
1485 
1486   b->mbs = mbs;
1487   b->nbs = mbs;
1488   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1489   if (!nnz) {
1490     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1491     else if (nz <= 0)        nz = 1;
1492     for (i=0; i<mbs; i++) {
1493       b->imax[i] = nz;
1494     }
1495     nz = nz*mbs; /* total nz */
1496   } else {
1497     nz = 0;
1498     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1499   }
1500   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1501 
1502   /* allocate the matrix space */
1503   len  = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1504   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1505   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1506   b->j = (int*)(b->a + nz*bs2);
1507   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1508   b->i = b->j + nz;
1509   b->singlemalloc = PETSC_TRUE;
1510 
1511   /* pointer to beginning of each row */
1512   b->i[0] = 0;
1513   for (i=1; i<mbs+1; i++) {
1514     b->i[i] = b->i[i-1] + b->imax[i-1];
1515   }
1516 
1517   /* b->ilen will count nonzeros in each block row so far. */
1518   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1519   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1520   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1521 
1522   B->bs               = bs;
1523   b->bs2              = bs2;
1524   b->nz             = 0;
1525   b->maxnz          = nz*bs2;
1526 
1527   b->inew             = 0;
1528   b->jnew             = 0;
1529   b->anew             = 0;
1530   b->a2anew           = 0;
1531   b->permute          = PETSC_FALSE;
1532   PetscFunctionReturn(0);
1533 }
1534 EXTERN_C_END
1535 
1536 EXTERN_C_BEGIN
1537 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1538 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*);
1539 EXTERN_C_END
1540 
1541 /*MC
1542    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1543    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1544 
1545    Options Database Keys:
1546 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1547 
1548   Level: beginner
1549 
1550 .seealso: MatCreateSeqSBAIJ
1551 M*/
1552 
1553 EXTERN_C_BEGIN
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1556 PetscErrorCode MatCreate_SeqSBAIJ(Mat B)
1557 {
1558   Mat_SeqSBAIJ *b;
1559   PetscErrorCode ierr;
1560   int size;
1561 
1562   PetscFunctionBegin;
1563   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1564   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1565   B->m = B->M = PetscMax(B->m,B->M);
1566   B->n = B->N = PetscMax(B->n,B->N);
1567 
1568   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1569   B->data = (void*)b;
1570   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1571   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1572   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1573   B->ops->view        = MatView_SeqSBAIJ;
1574   B->factor           = 0;
1575   B->lupivotthreshold = 1.0;
1576   B->mapping          = 0;
1577   b->row              = 0;
1578   b->icol             = 0;
1579   b->reallocs         = 0;
1580   b->saved_values     = 0;
1581 
1582   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1583   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1584 
1585   b->sorted           = PETSC_FALSE;
1586   b->roworiented      = PETSC_TRUE;
1587   b->nonew            = 0;
1588   b->diag             = 0;
1589   b->solve_work       = 0;
1590   b->mult_work        = 0;
1591   B->spptr            = 0;
1592   b->keepzeroedrows   = PETSC_FALSE;
1593   b->xtoy             = 0;
1594   b->XtoY             = 0;
1595 
1596   b->inew             = 0;
1597   b->jnew             = 0;
1598   b->anew             = 0;
1599   b->a2anew           = 0;
1600   b->permute          = PETSC_FALSE;
1601 
1602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1603                                      "MatStoreValues_SeqSBAIJ",
1604                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1606                                      "MatRetrieveValues_SeqSBAIJ",
1607                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1609                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1610                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1612                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1613                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1614   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1615                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1616                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1617   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1618                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1619                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1620 
1621   B->symmetric                  = PETSC_TRUE;
1622   B->structurally_symmetric     = PETSC_TRUE;
1623   B->symmetric_set              = PETSC_TRUE;
1624   B->structurally_symmetric_set = PETSC_TRUE;
1625   PetscFunctionReturn(0);
1626 }
1627 EXTERN_C_END
1628 
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1631 /*@C
1632    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1633    compressed row) format.  For good matrix assembly performance the
1634    user should preallocate the matrix storage by setting the parameter nz
1635    (or the array nnz).  By setting these parameters accurately, performance
1636    during matrix assembly can be increased by more than a factor of 50.
1637 
1638    Collective on Mat
1639 
1640    Input Parameters:
1641 +  A - the symmetric matrix
1642 .  bs - size of block
1643 .  nz - number of block nonzeros per block row (same for all rows)
1644 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1645          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1646 
1647    Options Database Keys:
1648 .   -mat_no_unroll - uses code that does not unroll the loops in the
1649                      block calculations (much slower)
1650 .    -mat_block_size - size of the blocks to use
1651 
1652    Level: intermediate
1653 
1654    Notes:
1655    Specify the preallocated storage with either nz or nnz (not both).
1656    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1657    allocation.  For additional details, see the users manual chapter on
1658    matrices.
1659 
1660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1661 @*/
1662 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1663   PetscErrorCode ierr,(*f)(Mat,int,int,const int[]);
1664 
1665   PetscFunctionBegin;
1666   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1667   if (f) {
1668     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1669   }
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatCreateSeqSBAIJ"
1675 /*@C
1676    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1677    compressed row) format.  For good matrix assembly performance the
1678    user should preallocate the matrix storage by setting the parameter nz
1679    (or the array nnz).  By setting these parameters accurately, performance
1680    during matrix assembly can be increased by more than a factor of 50.
1681 
1682    Collective on MPI_Comm
1683 
1684    Input Parameters:
1685 +  comm - MPI communicator, set to PETSC_COMM_SELF
1686 .  bs - size of block
1687 .  m - number of rows, or number of columns
1688 .  nz - number of block nonzeros per block row (same for all rows)
1689 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1690          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1691 
1692    Output Parameter:
1693 .  A - the symmetric matrix
1694 
1695    Options Database Keys:
1696 .   -mat_no_unroll - uses code that does not unroll the loops in the
1697                      block calculations (much slower)
1698 .    -mat_block_size - size of the blocks to use
1699 
1700    Level: intermediate
1701 
1702    Notes:
1703 
1704    Specify the preallocated storage with either nz or nnz (not both).
1705    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1706    allocation.  For additional details, see the users manual chapter on
1707    matrices.
1708 
1709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1710 @*/
1711 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1712 {
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1717   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1718   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1724 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1725 {
1726   Mat          C;
1727   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1728   PetscErrorCode ierr;
1729   int          i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1730 
1731   PetscFunctionBegin;
1732   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1733 
1734   *B = 0;
1735   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1736   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1737   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1738   c    = (Mat_SeqSBAIJ*)C->data;
1739 
1740   C->preallocated   = PETSC_TRUE;
1741   C->factor         = A->factor;
1742   c->row            = 0;
1743   c->icol           = 0;
1744   c->saved_values   = 0;
1745   c->keepzeroedrows = a->keepzeroedrows;
1746   C->assembled      = PETSC_TRUE;
1747 
1748   C->M    = A->M;
1749   C->N    = A->N;
1750   C->bs   = A->bs;
1751   c->bs2  = a->bs2;
1752   c->mbs  = a->mbs;
1753   c->nbs  = a->nbs;
1754 
1755   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1756   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1757   for (i=0; i<mbs; i++) {
1758     c->imax[i] = a->imax[i];
1759     c->ilen[i] = a->ilen[i];
1760   }
1761 
1762   /* allocate the matrix space */
1763   c->singlemalloc = PETSC_TRUE;
1764   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1765   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1766   c->j = (int*)(c->a + nz*bs2);
1767   c->i = c->j + nz;
1768   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1769   if (mbs > 0) {
1770     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1771     if (cpvalues == MAT_COPY_VALUES) {
1772       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1773     } else {
1774       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1775     }
1776   }
1777 
1778   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1779   c->sorted      = a->sorted;
1780   c->roworiented = a->roworiented;
1781   c->nonew       = a->nonew;
1782 
1783   if (a->diag) {
1784     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1785     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1786     for (i=0; i<mbs; i++) {
1787       c->diag[i] = a->diag[i];
1788     }
1789   } else c->diag        = 0;
1790   c->nz               = a->nz;
1791   c->maxnz            = a->maxnz;
1792   c->solve_work         = 0;
1793   c->mult_work          = 0;
1794   *B = C;
1795   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1801 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1802 {
1803   Mat_SeqSBAIJ *a;
1804   Mat          B;
1805   PetscErrorCode ierr;
1806   int          i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1807   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1808   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1809   int          *masked,nmask,tmp,bs2,ishift;
1810   PetscScalar  *aa;
1811   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1812 
1813   PetscFunctionBegin;
1814   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1815   bs2  = bs*bs;
1816 
1817   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1818   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1819   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1820   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1821   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1822   M = header[1]; N = header[2]; nz = header[3];
1823 
1824   if (header[3] < 0) {
1825     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1826   }
1827 
1828   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1829 
1830   /*
1831      This code adds extra rows to make sure the number of rows is
1832     divisible by the blocksize
1833   */
1834   mbs        = M/bs;
1835   extra_rows = bs - M + bs*(mbs);
1836   if (extra_rows == bs) extra_rows = 0;
1837   else                  mbs++;
1838   if (extra_rows) {
1839     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1840   }
1841 
1842   /* read in row lengths */
1843   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1844   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1845   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1846 
1847   /* read in column indices */
1848   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1849   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1850   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1851 
1852   /* loop over row lengths determining block row lengths */
1853   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1854   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1855   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1856   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1857   masked   = mask + mbs;
1858   rowcount = 0; nzcount = 0;
1859   for (i=0; i<mbs; i++) {
1860     nmask = 0;
1861     for (j=0; j<bs; j++) {
1862       kmax = rowlengths[rowcount];
1863       for (k=0; k<kmax; k++) {
1864         tmp = jj[nzcount++]/bs;   /* block col. index */
1865         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1866       }
1867       rowcount++;
1868     }
1869     s_browlengths[i] += nmask;
1870 
1871     /* zero out the mask elements we set */
1872     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1873   }
1874 
1875   /* create our matrix */
1876   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1877   ierr = MatSetType(B,type);CHKERRQ(ierr);
1878   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1879   a = (Mat_SeqSBAIJ*)B->data;
1880 
1881   /* set matrix "i" values */
1882   a->i[0] = 0;
1883   for (i=1; i<= mbs; i++) {
1884     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1885     a->ilen[i-1] = s_browlengths[i-1];
1886   }
1887   a->nz = a->i[mbs];
1888 
1889   /* read in nonzero values */
1890   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1891   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1892   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1893 
1894   /* set "a" and "j" values into matrix */
1895   nzcount = 0; jcount = 0;
1896   for (i=0; i<mbs; i++) {
1897     nzcountb = nzcount;
1898     nmask    = 0;
1899     for (j=0; j<bs; j++) {
1900       kmax = rowlengths[i*bs+j];
1901       for (k=0; k<kmax; k++) {
1902         tmp = jj[nzcount++]/bs; /* block col. index */
1903         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1904       }
1905     }
1906     /* sort the masked values */
1907     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1908 
1909     /* set "j" values into matrix */
1910     maskcount = 1;
1911     for (j=0; j<nmask; j++) {
1912       a->j[jcount++]  = masked[j];
1913       mask[masked[j]] = maskcount++;
1914     }
1915 
1916     /* set "a" values into matrix */
1917     ishift = bs2*a->i[i];
1918     for (j=0; j<bs; j++) {
1919       kmax = rowlengths[i*bs+j];
1920       for (k=0; k<kmax; k++) {
1921         tmp       = jj[nzcountb]/bs ; /* block col. index */
1922         if (tmp >= i){
1923           block     = mask[tmp] - 1;
1924           point     = jj[nzcountb] - bs*tmp;
1925           idx       = ishift + bs2*block + j + bs*point;
1926           a->a[idx] = aa[nzcountb];
1927         }
1928         nzcountb++;
1929       }
1930     }
1931     /* zero out the mask elements we set */
1932     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1933   }
1934   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1935 
1936   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1937   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1938   ierr = PetscFree(aa);CHKERRQ(ierr);
1939   ierr = PetscFree(jj);CHKERRQ(ierr);
1940   ierr = PetscFree(mask);CHKERRQ(ierr);
1941 
1942   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1943   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1944   ierr = MatView_Private(B);CHKERRQ(ierr);
1945   *A = B;
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 #undef __FUNCT__
1950 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1951 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1952 {
1953   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1954   MatScalar      *aa=a->a,*v,*v1;
1955   PetscScalar    *x,*b,*t,sum,d;
1956   PetscErrorCode ierr;
1957   PetscInt       m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j;
1958   PetscInt       nz,nz1,*vj,*vj1,i;
1959 
1960   PetscFunctionBegin;
1961   its = its*lits;
1962   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1963 
1964   if (bs > 1)
1965     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1966 
1967   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1968   if (xx != bb) {
1969     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1970   } else {
1971     b = x;
1972   }
1973 
1974   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1975 
1976   if (flag & SOR_ZERO_INITIAL_GUESS) {
1977     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1978       for (i=0; i<m; i++)
1979         t[i] = b[i];
1980 
1981       for (i=0; i<m; i++){
1982         d  = *(aa + ai[i]);  /* diag[i] */
1983         v  = aa + ai[i] + 1;
1984         vj = aj + ai[i] + 1;
1985         nz = ai[i+1] - ai[i] - 1;
1986         x[i] = omega*t[i]/d;
1987         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1988         PetscLogFlops(2*nz-1);
1989       }
1990     }
1991 
1992     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1993       for (i=0; i<m; i++)
1994         t[i] = b[i];
1995 
1996       for (i=0; i<m-1; i++){  /* update rhs */
1997         v  = aa + ai[i] + 1;
1998         vj = aj + ai[i] + 1;
1999         nz = ai[i+1] - ai[i] - 1;
2000         while (nz--) t[*vj++] -= x[i]*(*v++);
2001         PetscLogFlops(2*nz-1);
2002       }
2003       for (i=m-1; i>=0; i--){
2004         d  = *(aa + ai[i]);
2005         v  = aa + ai[i] + 1;
2006         vj = aj + ai[i] + 1;
2007         nz = ai[i+1] - ai[i] - 1;
2008         sum = t[i];
2009         while (nz--) sum -= x[*vj++]*(*v++);
2010         PetscLogFlops(2*nz-1);
2011         x[i] =   (1-omega)*x[i] + omega*sum/d;
2012       }
2013     }
2014     its--;
2015   }
2016 
2017   while (its--) {
2018     /*
2019        forward sweep:
2020        for i=0,...,m-1:
2021          sum[i] = (b[i] - U(i,:)x )/d[i];
2022          x[i]   = (1-omega)x[i] + omega*sum[i];
2023          b      = b - x[i]*U^T(i,:);
2024 
2025     */
2026     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2027       for (i=0; i<m; i++)
2028         t[i] = b[i];
2029 
2030       for (i=0; i<m; i++){
2031         d  = *(aa + ai[i]);  /* diag[i] */
2032         v  = aa + ai[i] + 1; v1=v;
2033         vj = aj + ai[i] + 1; vj1=vj;
2034         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2035         sum = t[i];
2036         while (nz1--) sum -= (*v1++)*x[*vj1++];
2037         x[i] = (1-omega)*x[i] + omega*sum/d;
2038         while (nz--) t[*vj++] -= x[i]*(*v++);
2039         PetscLogFlops(4*nz-2);
2040       }
2041     }
2042 
2043   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2044       /*
2045        backward sweep:
2046        b = b - x[i]*U^T(i,:), i=0,...,n-2
2047        for i=m-1,...,0:
2048          sum[i] = (b[i] - U(i,:)x )/d[i];
2049          x[i]   = (1-omega)x[i] + omega*sum[i];
2050       */
2051       for (i=0; i<m; i++)
2052         t[i] = b[i];
2053 
2054       for (i=0; i<m-1; i++){  /* update rhs */
2055         v  = aa + ai[i] + 1;
2056         vj = aj + ai[i] + 1;
2057         nz = ai[i+1] - ai[i] - 1;
2058         while (nz--) t[*vj++] -= x[i]*(*v++);
2059         PetscLogFlops(2*nz-1);
2060       }
2061       for (i=m-1; i>=0; i--){
2062         d  = *(aa + ai[i]);
2063         v  = aa + ai[i] + 1;
2064         vj = aj + ai[i] + 1;
2065         nz = ai[i+1] - ai[i] - 1;
2066         sum = t[i];
2067         while (nz--) sum -= x[*vj++]*(*v++);
2068         PetscLogFlops(2*nz-1);
2069         x[i] =   (1-omega)*x[i] + omega*sum/d;
2070       }
2071     }
2072   }
2073 
2074   ierr = PetscFree(t);CHKERRQ(ierr);
2075   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2076   if (bb != xx) {
2077     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2078   }
2079 
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 
2084 
2085 
2086 
2087 
2088