xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision c71e6ed79470f80f46156e2e6d79aab3dd836ca2)
1 
2 /*
3     Defines the basic matrix operations for the SBAIJ (compressed row)
4   matrix storage format.
5 */
6 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
7 #include "src/inline/spops.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 
10 #define CHUNKSIZE  10
11 
12 /*
13      Checks for missing diagonals
14 */
15 #undef __FUNCT__
16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
18 {
19   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       *diag,*jj = a->j,i;
22 
23   PetscFunctionBegin;
24   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
25   diag = a->diag;
26   for (i=0; i<a->mbs; i++) {
27     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
28   }
29   PetscFunctionReturn(0);
30 }
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
35 {
36   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
37   PetscErrorCode ierr;
38   PetscInt       i,mbs = a->mbs;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
44   ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   PetscInt     n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     PetscInt nz = a->i[n];
63     for (i=0; i<nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   PetscInt     i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     PetscInt nz = a->i[n]-1;
84     for (i=0; i<nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
93 {
94   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
95   PetscErrorCode ierr;
96 
97   PetscFunctionBegin;
98   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz);
99   ierr = PetscFree(a->a);CHKERRQ(ierr);
100   if (!a->singlemalloc) {
101     ierr = PetscFree(a->i);CHKERRQ(ierr);
102     ierr = PetscFree(a->j);CHKERRQ(ierr);
103   }
104   if (a->row) {
105     ierr = ISDestroy(a->row);CHKERRQ(ierr);
106   }
107   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
108   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
109   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
110   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
111   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
112   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
113   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
114   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
115   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
116 
117   if (a->inew){
118     ierr = PetscFree(a->inew);CHKERRQ(ierr);
119     a->inew = 0;
120   }
121   ierr = PetscFree(a);CHKERRQ(ierr);
122 
123   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
124   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
125   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
126   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
127   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
128   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
135 {
136   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
137 
138   PetscFunctionBegin;
139   switch (op) {
140   case MAT_ROW_ORIENTED:
141     a->roworiented    = PETSC_TRUE;
142     break;
143   case MAT_COLUMN_ORIENTED:
144     a->roworiented    = PETSC_FALSE;
145     break;
146   case MAT_COLUMNS_SORTED:
147     a->sorted         = PETSC_TRUE;
148     break;
149   case MAT_COLUMNS_UNSORTED:
150     a->sorted         = PETSC_FALSE;
151     break;
152   case MAT_KEEP_ZEROED_ROWS:
153     a->keepzeroedrows = PETSC_TRUE;
154     break;
155   case MAT_NO_NEW_NONZERO_LOCATIONS:
156     a->nonew          = 1;
157     break;
158   case MAT_NEW_NONZERO_LOCATION_ERR:
159     a->nonew          = -1;
160     break;
161   case MAT_NEW_NONZERO_ALLOCATION_ERR:
162     a->nonew          = -2;
163     break;
164   case MAT_YES_NEW_NONZERO_LOCATIONS:
165     a->nonew          = 0;
166     break;
167   case MAT_ROWS_SORTED:
168   case MAT_ROWS_UNSORTED:
169   case MAT_YES_NEW_DIAGONALS:
170   case MAT_IGNORE_OFF_PROC_ENTRIES:
171   case MAT_USE_HASH_TABLE:
172     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
173     break;
174   case MAT_NO_NEW_DIAGONALS:
175     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
176   case MAT_NOT_SYMMETRIC:
177   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
178   case MAT_HERMITIAN:
179     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
180   case MAT_SYMMETRIC:
181   case MAT_STRUCTURALLY_SYMMETRIC:
182   case MAT_NOT_HERMITIAN:
183   case MAT_SYMMETRY_ETERNAL:
184   case MAT_NOT_SYMMETRY_ETERNAL:
185     break;
186   default:
187     SETERRQ(PETSC_ERR_SUP,"unknown option");
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
194 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
195 {
196   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
197   PetscErrorCode ierr;
198   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
199   MatScalar      *aa,*aa_i;
200   PetscScalar    *v_i;
201 
202   PetscFunctionBegin;
203   bs  = A->bs;
204   ai  = a->i;
205   aj  = a->j;
206   aa  = a->a;
207   bs2 = a->bs2;
208 
209   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
210 
211   bn  = row/bs;   /* Block number */
212   bp  = row % bs; /* Block position */
213   M   = ai[bn+1] - ai[bn];
214   *ncols = bs*M;
215 
216   if (v) {
217     *v = 0;
218     if (*ncols) {
219       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
220       for (i=0; i<M; i++) { /* for each block in the block row */
221         v_i  = *v + i*bs;
222         aa_i = aa + bs2*(ai[bn] + i);
223         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
224       }
225     }
226   }
227 
228   if (cols) {
229     *cols = 0;
230     if (*ncols) {
231       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
232       for (i=0; i<M; i++) { /* for each block in the block row */
233         cols_i = *cols + i*bs;
234         itmp  = bs*aj[ai[bn] + i];
235         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
236       }
237     }
238   }
239 
240   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
241   /* this segment is currently removed, so only entries in the upper triangle are obtained */
242 #ifdef column_search
243   v_i    = *v    + M*bs;
244   cols_i = *cols + M*bs;
245   for (i=0; i<bn; i++){ /* for each block row */
246     M = ai[i+1] - ai[i];
247     for (j=0; j<M; j++){
248       itmp = aj[ai[i] + j];    /* block column value */
249       if (itmp == bn){
250         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
251         for (k=0; k<bs; k++) {
252           *cols_i++ = i*bs+k;
253           *v_i++    = aa_i[k];
254         }
255         *ncols += bs;
256         break;
257       }
258     }
259   }
260 #endif
261 
262   PetscFunctionReturn(0);
263 }
264 
265 #undef __FUNCT__
266 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
267 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
268 {
269   PetscErrorCode ierr;
270 
271   PetscFunctionBegin;
272   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
273   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
279 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
280 {
281   PetscErrorCode ierr;
282   PetscFunctionBegin;
283   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 #undef __FUNCT__
288 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
289 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
290 {
291   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
292   PetscErrorCode    ierr;
293   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
294   char              *name;
295   PetscViewerFormat format;
296 
297   PetscFunctionBegin;
298   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
299   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
300   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
301     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
302   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
303     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
304   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
305     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
306     for (i=0; i<a->mbs; i++) {
307       for (j=0; j<bs; j++) {
308         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
309         for (k=a->i[i]; k<a->i[i+1]; k++) {
310           for (l=0; l<bs; l++) {
311 #if defined(PETSC_USE_COMPLEX)
312             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
313               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
314                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
315             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
316               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
317                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
318             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
319               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
320             }
321 #else
322             if (a->a[bs2*k + l*bs + j] != 0.0) {
323               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
324             }
325 #endif
326           }
327         }
328         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
329       }
330     }
331     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
332   } else {
333     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
334     for (i=0; i<a->mbs; i++) {
335       for (j=0; j<bs; j++) {
336         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
337         for (k=a->i[i]; k<a->i[i+1]; k++) {
338           for (l=0; l<bs; l++) {
339 #if defined(PETSC_USE_COMPLEX)
340             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
341               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
342                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
343             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
344               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
345                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
346             } else {
347               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
348             }
349 #else
350             ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
351 #endif
352           }
353         }
354         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
355       }
356     }
357     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
358   }
359   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
365 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
366 {
367   Mat            A = (Mat) Aa;
368   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
369   PetscErrorCode ierr;
370   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
371   PetscMPIInt    rank;
372   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
373   MatScalar      *aa;
374   MPI_Comm       comm;
375   PetscViewer    viewer;
376 
377   PetscFunctionBegin;
378   /*
379       This is nasty. If this is called from an originally parallel matrix
380    then all processes call this,but only the first has the matrix so the
381    rest should return immediately.
382   */
383   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
384   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
385   if (rank) PetscFunctionReturn(0);
386 
387   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
388 
389   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
390   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
391 
392   /* loop over matrix elements drawing boxes */
393   color = PETSC_DRAW_BLUE;
394   for (i=0,row=0; i<mbs; i++,row+=bs) {
395     for (j=a->i[i]; j<a->i[i+1]; j++) {
396       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
397       x_l = a->j[j]*bs; x_r = x_l + 1.0;
398       aa = a->a + j*bs2;
399       for (k=0; k<bs; k++) {
400         for (l=0; l<bs; l++) {
401           if (PetscRealPart(*aa++) >=  0.) continue;
402           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
403         }
404       }
405     }
406   }
407   color = PETSC_DRAW_CYAN;
408   for (i=0,row=0; i<mbs; i++,row+=bs) {
409     for (j=a->i[i]; j<a->i[i+1]; j++) {
410       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
411       x_l = a->j[j]*bs; x_r = x_l + 1.0;
412       aa = a->a + j*bs2;
413       for (k=0; k<bs; k++) {
414         for (l=0; l<bs; l++) {
415           if (PetscRealPart(*aa++) != 0.) continue;
416           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
417         }
418       }
419     }
420   }
421 
422   color = PETSC_DRAW_RED;
423   for (i=0,row=0; i<mbs; i++,row+=bs) {
424     for (j=a->i[i]; j<a->i[i+1]; j++) {
425       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
426       x_l = a->j[j]*bs; x_r = x_l + 1.0;
427       aa = a->a + j*bs2;
428       for (k=0; k<bs; k++) {
429         for (l=0; l<bs; l++) {
430           if (PetscRealPart(*aa++) <= 0.) continue;
431           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
432         }
433       }
434     }
435   }
436   PetscFunctionReturn(0);
437 }
438 
439 #undef __FUNCT__
440 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
441 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
442 {
443   PetscErrorCode ierr;
444   PetscReal      xl,yl,xr,yr,w,h;
445   PetscDraw      draw;
446   PetscTruth     isnull;
447 
448   PetscFunctionBegin;
449 
450   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
451   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
452 
453   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
454   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
455   xr += w;    yr += h;  xl = -w;     yl = -h;
456   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
457   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
458   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatView_SeqSBAIJ"
464 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
465 {
466   PetscErrorCode ierr;
467   PetscTruth     iascii,isdraw;
468 
469   PetscFunctionBegin;
470   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
471   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
472   if (iascii){
473     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
474   } else if (isdraw) {
475     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
476   } else {
477     Mat B;
478     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
479     ierr = MatView(B,viewer);CHKERRQ(ierr);
480     ierr = MatDestroy(B);CHKERRQ(ierr);
481   }
482   PetscFunctionReturn(0);
483 }
484 
485 
486 #undef __FUNCT__
487 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
488 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
489 {
490   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
491   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
492   PetscInt     *ai = a->i,*ailen = a->ilen;
493   PetscInt     brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
494   MatScalar    *ap,*aa = a->a,zero = 0.0;
495 
496   PetscFunctionBegin;
497   for (k=0; k<m; k++) { /* loop over rows */
498     row  = im[k]; brow = row/bs;
499     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
500     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
501     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
502     nrow = ailen[brow];
503     for (l=0; l<n; l++) { /* loop over columns */
504       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]);
505       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
506       col  = in[l] ;
507       bcol = col/bs;
508       cidx = col%bs;
509       ridx = row%bs;
510       high = nrow;
511       low  = 0; /* assume unsorted */
512       while (high-low > 5) {
513         t = (low+high)/2;
514         if (rp[t] > bcol) high = t;
515         else             low  = t;
516       }
517       for (i=low; i<high; i++) {
518         if (rp[i] > bcol) break;
519         if (rp[i] == bcol) {
520           *v++ = ap[bs2*i+bs*cidx+ridx];
521           goto finished;
522         }
523       }
524       *v++ = zero;
525       finished:;
526     }
527   }
528   PetscFunctionReturn(0);
529 }
530 
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
534 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
535 {
536   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
537   PetscErrorCode  ierr;
538   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
539   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
540   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
541   PetscTruth      roworiented=a->roworiented;
542   const MatScalar *value = v;
543   MatScalar       *ap,*aa = a->a,*bap;
544 
545   PetscFunctionBegin;
546   if (roworiented) {
547     stepval = (n-1)*bs;
548   } else {
549     stepval = (m-1)*bs;
550   }
551   for (k=0; k<m; k++) { /* loop over added rows */
552     row  = im[k];
553     if (row < 0) continue;
554 #if defined(PETSC_USE_DEBUG)
555     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
556 #endif
557     rp   = aj + ai[row];
558     ap   = aa + bs2*ai[row];
559     rmax = imax[row];
560     nrow = ailen[row];
561     low  = 0;
562     high = nrow;
563     for (l=0; l<n; l++) { /* loop over added columns */
564       if (in[l] < 0) continue;
565       col = in[l];
566 #if defined(PETSC_USE_DEBUG)
567       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
568 #endif
569       if (col < row) continue; /* ignore lower triangular block */
570       if (roworiented) {
571         value = v + k*(stepval+bs)*bs + l*bs;
572       } else {
573         value = v + l*(stepval+bs)*bs + k*bs;
574       }
575       if (col < lastcol) low = 0; else high = nrow;
576       lastcol = col;
577       while (high-low > 7) {
578         t = (low+high)/2;
579         if (rp[t] > col) high = t;
580         else             low  = t;
581       }
582       for (i=low; i<high; i++) {
583         if (rp[i] > col) break;
584         if (rp[i] == col) {
585           bap  = ap +  bs2*i;
586           if (roworiented) {
587             if (is == ADD_VALUES) {
588               for (ii=0; ii<bs; ii++,value+=stepval) {
589                 for (jj=ii; jj<bs2; jj+=bs) {
590                   bap[jj] += *value++;
591                 }
592               }
593             } else {
594               for (ii=0; ii<bs; ii++,value+=stepval) {
595                 for (jj=ii; jj<bs2; jj+=bs) {
596                   bap[jj] = *value++;
597                 }
598               }
599             }
600           } else {
601             if (is == ADD_VALUES) {
602               for (ii=0; ii<bs; ii++,value+=stepval) {
603                 for (jj=0; jj<bs; jj++) {
604                   *bap++ += *value++;
605                 }
606               }
607             } else {
608               for (ii=0; ii<bs; ii++,value+=stepval) {
609                 for (jj=0; jj<bs; jj++) {
610                   *bap++  = *value++;
611                 }
612               }
613             }
614           }
615           goto noinsert2;
616         }
617       }
618       if (nonew == 1) goto noinsert2;
619       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
620       if (nrow >= rmax) {
621         /* there is no extra room in row, therefore enlarge */
622         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
623         MatScalar *new_a;
624 
625         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
626 
627         /* malloc new storage space */
628         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
629 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
630         new_j   = (PetscInt*)(new_a + bs2*new_nz);
631         new_i   = new_j + new_nz;
632 
633         /* copy over old data into new slots */
634         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
635         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
636         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
637         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
638         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
639         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
640         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
641         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
642         /* free up old matrix storage */
643         ierr = PetscFree(a->a);CHKERRQ(ierr);
644         if (!a->singlemalloc) {
645           ierr = PetscFree(a->i);CHKERRQ(ierr);
646           ierr = PetscFree(a->j);CHKERRQ(ierr);
647         }
648         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
649         a->singlemalloc = PETSC_TRUE;
650 
651         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
652         rmax = imax[row] = imax[row] + CHUNKSIZE;
653         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
654         a->maxnz += bs2*CHUNKSIZE;
655         a->reallocs++;
656         a->nz++;
657       }
658       N = nrow++ - 1;
659       /* shift up all the later entries in this row */
660       for (ii=N; ii>=i; ii--) {
661         rp[ii+1] = rp[ii];
662         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
663       }
664       if (N >= i) {
665         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
666       }
667       rp[i] = col;
668       bap   = ap +  bs2*i;
669       if (roworiented) {
670         for (ii=0; ii<bs; ii++,value+=stepval) {
671           for (jj=ii; jj<bs2; jj+=bs) {
672             bap[jj] = *value++;
673           }
674         }
675       } else {
676         for (ii=0; ii<bs; ii++,value+=stepval) {
677           for (jj=0; jj<bs; jj++) {
678             *bap++  = *value++;
679           }
680         }
681       }
682       noinsert2:;
683       low = i;
684     }
685     ailen[row] = nrow;
686   }
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
692 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
693 {
694   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
695   PetscErrorCode ierr;
696   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
697   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
698   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
699   MatScalar      *aa = a->a,*ap;
700 
701   PetscFunctionBegin;
702   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
703 
704   if (m) rmax = ailen[0];
705   for (i=1; i<mbs; i++) {
706     /* move each row back by the amount of empty slots (fshift) before it*/
707     fshift += imax[i-1] - ailen[i-1];
708     rmax   = PetscMax(rmax,ailen[i]);
709     if (fshift) {
710       ip = aj + ai[i]; ap = aa + bs2*ai[i];
711       N = ailen[i];
712       for (j=0; j<N; j++) {
713         ip[j-fshift] = ip[j];
714         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
715       }
716     }
717     ai[i] = ai[i-1] + ailen[i-1];
718   }
719   if (mbs) {
720     fshift += imax[mbs-1] - ailen[mbs-1];
721     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
722   }
723   /* reset ilen and imax for each row */
724   for (i=0; i<mbs; i++) {
725     ailen[i] = imax[i] = ai[i+1] - ai[i];
726   }
727   a->nz = ai[mbs];
728 
729   /* diagonals may have moved, reset it */
730   if (a->diag) {
731     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
732   }
733   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",
734            m,A->m,A->bs,fshift*bs2,a->nz*bs2);
735   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",
736            a->reallocs);
737   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax);
738   a->reallocs          = 0;
739   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
740 
741   PetscFunctionReturn(0);
742 }
743 
744 /*
745    This function returns an array of flags which indicate the locations of contiguous
746    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
747    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
748    Assume: sizes should be long enough to hold all the values.
749 */
750 #undef __FUNCT__
751 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
752 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
753 {
754   PetscInt   i,j,k,row;
755   PetscTruth flg;
756 
757   PetscFunctionBegin;
758   for (i=0,j=0; i<n; j++) {
759     row = idx[i];
760     if (row%bs!=0) { /* Not the begining of a block */
761       sizes[j] = 1;
762       i++;
763     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
764       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
765       i++;
766     } else { /* Begining of the block, so check if the complete block exists */
767       flg = PETSC_TRUE;
768       for (k=1; k<bs; k++) {
769         if (row+k != idx[i+k]) { /* break in the block */
770           flg = PETSC_FALSE;
771           break;
772         }
773       }
774       if (flg) { /* No break in the bs */
775         sizes[j] = bs;
776         i+= bs;
777       } else {
778         sizes[j] = 1;
779         i++;
780       }
781     }
782   }
783   *bs_max = j;
784   PetscFunctionReturn(0);
785 }
786 
787 
788 /* Only add/insert a(i,j) with i<=j (blocks).
789    Any a(i,j) with i>j input by user is ingored.
790 */
791 
792 #undef __FUNCT__
793 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
794 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
795 {
796   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
797   PetscErrorCode ierr;
798   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
799   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
800   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
801   PetscInt       ridx,cidx,bs2=a->bs2;
802   MatScalar      *ap,value,*aa=a->a,*bap;
803 
804   PetscFunctionBegin;
805 
806   for (k=0; k<m; k++) { /* loop over added rows */
807     row  = im[k];       /* row number */
808     brow = row/bs;      /* block row number */
809     if (row < 0) continue;
810 #if defined(PETSC_USE_DEBUG)
811     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
812 #endif
813     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
814     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
815     rmax = imax[brow];  /* maximum space allocated for this row */
816     nrow = ailen[brow]; /* actual length of this row */
817     low  = 0;
818     high = nrow;
819 
820     for (l=0; l<n; l++) { /* loop over added columns */
821       if (in[l] < 0) continue;
822 #if defined(PETSC_USE_DEBUG)
823       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1);
824 #endif
825       col = in[l];
826       bcol = col/bs;              /* block col number */
827 
828       if (brow <= bcol){
829         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
830         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
831           /* element value a(k,l) */
832           if (roworiented) {
833             value = v[l + k*n];
834           } else {
835             value = v[k + l*m];
836           }
837 
838           /* move pointer bap to a(k,l) quickly and add/insert value */
839           if (col < lastcol) low = 0; else high = nrow;
840           lastcol = col;
841           while (high-low > 7) {
842             t = (low+high)/2;
843             if (rp[t] > bcol) high = t;
844             else              low  = t;
845           }
846           for (i=low; i<high; i++) {
847             /* printf("The loop of i=low.., rp[%D]=%D\n",i,rp[i]); */
848             if (rp[i] > bcol) break;
849             if (rp[i] == bcol) {
850               bap  = ap +  bs2*i + bs*cidx + ridx;
851               if (is == ADD_VALUES) *bap += value;
852               else                  *bap  = value;
853               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
854               if (brow == bcol && ridx < cidx){
855                 bap  = ap +  bs2*i + bs*ridx + cidx;
856                 if (is == ADD_VALUES) *bap += value;
857                 else                  *bap  = value;
858               }
859               goto noinsert1;
860             }
861           }
862 
863       if (nonew == 1) goto noinsert1;
864       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
865       if (nrow >= rmax) {
866         /* there is no extra room in row, therefore enlarge */
867         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
868         MatScalar *new_a;
869 
870         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
871 
872         /* Malloc new storage space */
873         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
874         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
875         new_j = (PetscInt*)(new_a + bs2*new_nz);
876         new_i = new_j + new_nz;
877 
878         /* copy over old data into new slots */
879         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
880         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
881         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
882         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
883         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
884         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
885         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
886         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
887         /* free up old matrix storage */
888         ierr = PetscFree(a->a);CHKERRQ(ierr);
889         if (!a->singlemalloc) {
890           ierr = PetscFree(a->i);CHKERRQ(ierr);
891           ierr = PetscFree(a->j);CHKERRQ(ierr);
892         }
893         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
894         a->singlemalloc = PETSC_TRUE;
895 
896         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
897         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
898         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
899         a->maxnz += bs2*CHUNKSIZE;
900         a->reallocs++;
901         a->nz++;
902       }
903 
904       N = nrow++ - 1;
905       /* shift up all the later entries in this row */
906       for (ii=N; ii>=i; ii--) {
907         rp[ii+1] = rp[ii];
908         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
909       }
910       if (N>=i) {
911         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
912       }
913       rp[i]                      = bcol;
914       ap[bs2*i + bs*cidx + ridx] = value;
915       noinsert1:;
916       low = i;
917       /* } */
918         }
919       } /* end of if .. if..  */
920     }                     /* end of loop over added columns */
921     ailen[brow] = nrow;
922   }                       /* end of loop over added rows */
923 
924   PetscFunctionReturn(0);
925 }
926 
927 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
928 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
929 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt);
930 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*);
931 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]);
932 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat);
933 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
934 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
935 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
936 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
937 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
938 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
939 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
940 
941 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
942 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
943 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
949 
950 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
951 
952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
960 
961 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*);
962 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*);
963 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*);
964 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*);
965 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*);
966 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*);
967 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*);
968 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*);
969 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*);
970 
971 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
972 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
973 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
974 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
975 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
976 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
977 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
978 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
979 
980 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
981 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
982 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
983 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
984 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
985 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
986 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
987 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
988 
989 #ifdef HAVE_MatICCFactor
990 /* modified from MatILUFactor_SeqSBAIJ, needs further work!  */
991 #undef __FUNCT__
992 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
993 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
994 {
995   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
996   Mat            outA;
997   PetscErrorCode ierr;
998   PetscTruth     row_identity,col_identity;
999 
1000   PetscFunctionBegin;
1001   outA          = inA;
1002   inA->factor   = FACTOR_CHOLESKY;
1003 
1004   if (!a->diag) {
1005     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1006   }
1007   /*
1008       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1009       for ILU(0) factorization with natural ordering
1010   */
1011   switch (a->bs) {
1012   case 1:
1013     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1014     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1015     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1016     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1017   case 2:
1018     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1019     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1020     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1021     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1022     break;
1023   case 3:
1024     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1025     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1026     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1027     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1028     break;
1029   case 4:
1030     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1031     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1032     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1033     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1034     break;
1035   case 5:
1036     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1037     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1038     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1039     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1040     break;
1041   case 6:
1042     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1043     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1044     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1045     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1046     break;
1047   case 7:
1048     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1049     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1050     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1051     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1052     break;
1053   default:
1054     a->row        = row;
1055     a->icol       = col;
1056     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1057     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1058 
1059     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1060     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1061     ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1062 
1063     if (!a->solve_work) {
1064       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1065       ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1066     }
1067   }
1068 
1069   ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1070 
1071   PetscFunctionReturn(0);
1072 }
1073 #endif
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1077 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1078 {
1079   static PetscTruth called = PETSC_FALSE;
1080   MPI_Comm          comm = A->comm;
1081   PetscErrorCode    ierr;
1082 
1083   PetscFunctionBegin;
1084   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1085   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1086   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 EXTERN_C_BEGIN
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1093 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1094 {
1095   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1096   PetscInt     i,nz,n;
1097 
1098   PetscFunctionBegin;
1099   nz = baij->maxnz;
1100   n  = mat->n;
1101   for (i=0; i<nz; i++) {
1102     baij->j[i] = indices[i];
1103   }
1104   baij->nz = nz;
1105   for (i=0; i<n; i++) {
1106     baij->ilen[i] = baij->imax[i];
1107   }
1108 
1109   PetscFunctionReturn(0);
1110 }
1111 EXTERN_C_END
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1115 /*@
1116     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1117        in the matrix.
1118 
1119   Input Parameters:
1120 +  mat     - the SeqSBAIJ matrix
1121 -  indices - the column indices
1122 
1123   Level: advanced
1124 
1125   Notes:
1126     This can be called if you have precomputed the nonzero structure of the
1127   matrix and want to provide it to the matrix object to improve the performance
1128   of the MatSetValues() operation.
1129 
1130     You MUST have set the correct numbers of nonzeros per row in the call to
1131   MatCreateSeqSBAIJ().
1132 
1133     MUST be called before any calls to MatSetValues()
1134 
1135 .seealso: MatCreateSeqSBAIJ
1136 @*/
1137 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1138 {
1139   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1140 
1141   PetscFunctionBegin;
1142   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1143   PetscValidPointer(indices,2);
1144   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1145   if (f) {
1146     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1147   } else {
1148     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1149   }
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1155 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1156 {
1157   PetscErrorCode ierr;
1158 
1159   PetscFunctionBegin;
1160   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1166 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1167 {
1168   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1169   PetscFunctionBegin;
1170   *array = a->a;
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1176 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1177 {
1178   PetscFunctionBegin;
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #include "petscblaslapack.h"
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1185 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1186 {
1187   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1188   PetscErrorCode ierr;
1189   PetscInt       i,bs=Y->bs,bs2,j;
1190   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1191 
1192   PetscFunctionBegin;
1193   if (str == SAME_NONZERO_PATTERN) {
1194     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1195   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1196     if (y->xtoy && y->XtoY != X) {
1197       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1198       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1199     }
1200     if (!y->xtoy) { /* get xtoy */
1201       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1202       y->XtoY = X;
1203     }
1204     bs2 = bs*bs;
1205     for (i=0; i<x->nz; i++) {
1206       j = 0;
1207       while (j < bs2){
1208         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1209         j++;
1210       }
1211     }
1212     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1213   } else {
1214     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1215   }
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1221 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1222 {
1223   PetscFunctionBegin;
1224   *flg = PETSC_TRUE;
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNCT__
1229 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1230 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1231 {
1232   PetscFunctionBegin;
1233   *flg = PETSC_TRUE;
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1239 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1240 {
1241   PetscFunctionBegin;
1242   *flg = PETSC_FALSE;
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 /* -------------------------------------------------------------------*/
1247 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1248        MatGetRow_SeqSBAIJ,
1249        MatRestoreRow_SeqSBAIJ,
1250        MatMult_SeqSBAIJ_N,
1251 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1252        MatMult_SeqSBAIJ_N,
1253        MatMultAdd_SeqSBAIJ_N,
1254        MatSolve_SeqSBAIJ_N,
1255        0,
1256        0,
1257 /*10*/ 0,
1258        0,
1259        MatCholeskyFactor_SeqSBAIJ,
1260        MatRelax_SeqSBAIJ,
1261        MatTranspose_SeqSBAIJ,
1262 /*15*/ MatGetInfo_SeqSBAIJ,
1263        MatEqual_SeqSBAIJ,
1264        MatGetDiagonal_SeqSBAIJ,
1265        MatDiagonalScale_SeqSBAIJ,
1266        MatNorm_SeqSBAIJ,
1267 /*20*/ 0,
1268        MatAssemblyEnd_SeqSBAIJ,
1269        0,
1270        MatSetOption_SeqSBAIJ,
1271        MatZeroEntries_SeqSBAIJ,
1272 /*25*/ 0,
1273        0,
1274        0,
1275        MatCholeskyFactorSymbolic_SeqSBAIJ,
1276        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1277 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1278        0,
1279        MatICCFactorSymbolic_SeqSBAIJ,
1280        MatGetArray_SeqSBAIJ,
1281        MatRestoreArray_SeqSBAIJ,
1282 /*35*/ MatDuplicate_SeqSBAIJ,
1283        0,
1284        0,
1285        0,
1286        0,
1287 /*40*/ MatAXPY_SeqSBAIJ,
1288        MatGetSubMatrices_SeqSBAIJ,
1289        MatIncreaseOverlap_SeqSBAIJ,
1290        MatGetValues_SeqSBAIJ,
1291        0,
1292 /*45*/ MatPrintHelp_SeqSBAIJ,
1293        MatScale_SeqSBAIJ,
1294        0,
1295        0,
1296        0,
1297 /*50*/ 0,
1298        MatGetRowIJ_SeqSBAIJ,
1299        MatRestoreRowIJ_SeqSBAIJ,
1300        0,
1301        0,
1302 /*55*/ 0,
1303        0,
1304        0,
1305        0,
1306        MatSetValuesBlocked_SeqSBAIJ,
1307 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1308        0,
1309        0,
1310        MatGetPetscMaps_Petsc,
1311        0,
1312 /*65*/ 0,
1313        0,
1314        0,
1315        0,
1316        0,
1317 /*70*/ MatGetRowMax_SeqSBAIJ,
1318        0,
1319        0,
1320        0,
1321        0,
1322 /*75*/ 0,
1323        0,
1324        0,
1325        0,
1326        0,
1327 /*80*/ 0,
1328        0,
1329        0,
1330 #if !defined(PETSC_USE_COMPLEX)
1331        MatGetInertia_SeqSBAIJ,
1332 #else
1333        0,
1334 #endif
1335        MatLoad_SeqSBAIJ,
1336 /*85*/ MatIsSymmetric_SeqSBAIJ,
1337        MatIsHermitian_SeqSBAIJ,
1338        MatIsStructurallySymmetric_SeqSBAIJ,
1339        0,
1340        0,
1341 /*90*/ 0,
1342        0,
1343        0,
1344        0,
1345        0,
1346 /*95*/ 0,
1347        0,
1348        0,
1349        0};
1350 
1351 EXTERN_C_BEGIN
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1354 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat)
1355 {
1356   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1357   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
1358   PetscErrorCode ierr;
1359 
1360   PetscFunctionBegin;
1361   if (aij->nonew != 1) {
1362     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1363   }
1364 
1365   /* allocate space for values if not already there */
1366   if (!aij->saved_values) {
1367     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1368   }
1369 
1370   /* copy values over */
1371   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 EXTERN_C_END
1375 
1376 EXTERN_C_BEGIN
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1379 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat)
1380 {
1381   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1382   PetscErrorCode ierr;
1383   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
1384 
1385   PetscFunctionBegin;
1386   if (aij->nonew != 1) {
1387     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1388   }
1389   if (!aij->saved_values) {
1390     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1391   }
1392 
1393   /* copy values over */
1394   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1395   PetscFunctionReturn(0);
1396 }
1397 EXTERN_C_END
1398 
1399 EXTERN_C_BEGIN
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1402 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1403 {
1404   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1405   PetscErrorCode ierr;
1406   PetscInt       i,len,mbs,bs2;
1407   PetscTruth     flg;
1408 
1409   PetscFunctionBegin;
1410   B->preallocated = PETSC_TRUE;
1411   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1412   mbs  = B->m/bs;
1413   bs2  = bs*bs;
1414 
1415   if (mbs*bs != B->m) {
1416     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1417   }
1418 
1419   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1420   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1421   if (nnz) {
1422     for (i=0; i<mbs; i++) {
1423       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1424       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1425     }
1426   }
1427 
1428   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1429   if (!flg) {
1430     switch (bs) {
1431     case 1:
1432       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1433       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1434       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1435       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1436       B->ops->mult            = MatMult_SeqSBAIJ_1;
1437       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1438       break;
1439     case 2:
1440       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1441       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1442       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_2;
1443       B->ops->mult            = MatMult_SeqSBAIJ_2;
1444       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1445       break;
1446     case 3:
1447       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1448       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1449       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_3;
1450       B->ops->mult            = MatMult_SeqSBAIJ_3;
1451       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1452       break;
1453     case 4:
1454       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1455       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1456       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_4;
1457       B->ops->mult            = MatMult_SeqSBAIJ_4;
1458       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1459       break;
1460     case 5:
1461       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1462       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1463       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_5;
1464       B->ops->mult            = MatMult_SeqSBAIJ_5;
1465       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1466       break;
1467     case 6:
1468       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1469       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1470       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_6;
1471       B->ops->mult            = MatMult_SeqSBAIJ_6;
1472       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1473       break;
1474     case 7:
1475       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1476       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1477       B->ops->solvetranspose  = MatSolve_SeqSBAIJ_7;
1478       B->ops->mult            = MatMult_SeqSBAIJ_7;
1479       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1480       break;
1481     }
1482   }
1483 
1484   b->mbs = mbs;
1485   b->nbs = mbs;
1486   ierr   = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
1487   if (!nnz) {
1488     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1489     else if (nz <= 0)        nz = 1;
1490     for (i=0; i<mbs; i++) {
1491       b->imax[i] = nz;
1492     }
1493     nz = nz*mbs; /* total nz */
1494   } else {
1495     nz = 0;
1496     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1497   }
1498   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1499 
1500   /* allocate the matrix space */
1501   len  = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
1502   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1503   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1504   b->j = (PetscInt*)(b->a + nz*bs2);
1505   ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1506   b->i = b->j + nz;
1507   b->singlemalloc = PETSC_TRUE;
1508 
1509   /* pointer to beginning of each row */
1510   b->i[0] = 0;
1511   for (i=1; i<mbs+1; i++) {
1512     b->i[i] = b->i[i-1] + b->imax[i-1];
1513   }
1514 
1515   /* b->ilen will count nonzeros in each block row so far. */
1516   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
1517   ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1518   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1519 
1520   B->bs               = bs;
1521   b->bs2              = bs2;
1522   b->nz             = 0;
1523   b->maxnz          = nz*bs2;
1524 
1525   b->inew             = 0;
1526   b->jnew             = 0;
1527   b->anew             = 0;
1528   b->a2anew           = 0;
1529   b->permute          = PETSC_FALSE;
1530   PetscFunctionReturn(0);
1531 }
1532 EXTERN_C_END
1533 
1534 EXTERN_C_BEGIN
1535 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1536 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*);
1537 EXTERN_C_END
1538 
1539 /*MC
1540    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1541    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1542 
1543    Options Database Keys:
1544 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1545 
1546   Level: beginner
1547 
1548 .seealso: MatCreateSeqSBAIJ
1549 M*/
1550 
1551 EXTERN_C_BEGIN
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1554 PetscErrorCode MatCreate_SeqSBAIJ(Mat B)
1555 {
1556   Mat_SeqSBAIJ   *b;
1557   PetscErrorCode ierr;
1558   PetscMPIInt    size;
1559 
1560   PetscFunctionBegin;
1561   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1562   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1563   B->m = B->M = PetscMax(B->m,B->M);
1564   B->n = B->N = PetscMax(B->n,B->N);
1565 
1566   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1567   B->data = (void*)b;
1568   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1569   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1570   B->ops->view        = MatView_SeqSBAIJ;
1571   B->factor           = 0;
1572   B->lupivotthreshold = 1.0;
1573   B->mapping          = 0;
1574   b->row              = 0;
1575   b->icol             = 0;
1576   b->reallocs         = 0;
1577   b->saved_values     = 0;
1578 
1579   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1580   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1581 
1582   b->sorted           = PETSC_FALSE;
1583   b->roworiented      = PETSC_TRUE;
1584   b->nonew            = 0;
1585   b->diag             = 0;
1586   b->solve_work       = 0;
1587   b->mult_work        = 0;
1588   B->spptr            = 0;
1589   b->keepzeroedrows   = PETSC_FALSE;
1590   b->xtoy             = 0;
1591   b->XtoY             = 0;
1592 
1593   b->inew             = 0;
1594   b->jnew             = 0;
1595   b->anew             = 0;
1596   b->a2anew           = 0;
1597   b->permute          = PETSC_FALSE;
1598 
1599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1600                                      "MatStoreValues_SeqSBAIJ",
1601                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1603                                      "MatRetrieveValues_SeqSBAIJ",
1604                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1606                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1607                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1609                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1610                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1612                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1613                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1614   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1615                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1616                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1617 
1618   B->symmetric                  = PETSC_TRUE;
1619   B->structurally_symmetric     = PETSC_TRUE;
1620   B->symmetric_set              = PETSC_TRUE;
1621   B->structurally_symmetric_set = PETSC_TRUE;
1622   PetscFunctionReturn(0);
1623 }
1624 EXTERN_C_END
1625 
1626 #undef __FUNCT__
1627 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1628 /*@C
1629    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1630    compressed row) format.  For good matrix assembly performance the
1631    user should preallocate the matrix storage by setting the parameter nz
1632    (or the array nnz).  By setting these parameters accurately, performance
1633    during matrix assembly can be increased by more than a factor of 50.
1634 
1635    Collective on Mat
1636 
1637    Input Parameters:
1638 +  A - the symmetric matrix
1639 .  bs - size of block
1640 .  nz - number of block nonzeros per block row (same for all rows)
1641 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1642          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1643 
1644    Options Database Keys:
1645 .   -mat_no_unroll - uses code that does not unroll the loops in the
1646                      block calculations (much slower)
1647 .    -mat_block_size - size of the blocks to use
1648 
1649    Level: intermediate
1650 
1651    Notes:
1652    Specify the preallocated storage with either nz or nnz (not both).
1653    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1654    allocation.  For additional details, see the users manual chapter on
1655    matrices.
1656 
1657    If the nnz parameter is given then the nz parameter is ignored
1658 
1659 
1660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1661 @*/
1662 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1663 {
1664   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1665 
1666   PetscFunctionBegin;
1667   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1668   if (f) {
1669     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1670   }
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatCreateSeqSBAIJ"
1676 /*@C
1677    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1678    compressed row) format.  For good matrix assembly performance the
1679    user should preallocate the matrix storage by setting the parameter nz
1680    (or the array nnz).  By setting these parameters accurately, performance
1681    during matrix assembly can be increased by more than a factor of 50.
1682 
1683    Collective on MPI_Comm
1684 
1685    Input Parameters:
1686 +  comm - MPI communicator, set to PETSC_COMM_SELF
1687 .  bs - size of block
1688 .  m - number of rows, or number of columns
1689 .  nz - number of block nonzeros per block row (same for all rows)
1690 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1691          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1692 
1693    Output Parameter:
1694 .  A - the symmetric matrix
1695 
1696    Options Database Keys:
1697 .   -mat_no_unroll - uses code that does not unroll the loops in the
1698                      block calculations (much slower)
1699 .    -mat_block_size - size of the blocks to use
1700 
1701    Level: intermediate
1702 
1703    Notes:
1704 
1705    Specify the preallocated storage with either nz or nnz (not both).
1706    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1707    allocation.  For additional details, see the users manual chapter on
1708    matrices.
1709 
1710    If the nnz parameter is given then the nz parameter is ignored
1711 
1712 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1713 @*/
1714 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1715 {
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1720   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1721   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1727 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1728 {
1729   Mat            C;
1730   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1731   PetscErrorCode ierr;
1732   PetscInt       i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1733 
1734   PetscFunctionBegin;
1735   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1736 
1737   *B = 0;
1738   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1739   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1740   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1741   c    = (Mat_SeqSBAIJ*)C->data;
1742 
1743   C->preallocated   = PETSC_TRUE;
1744   C->factor         = A->factor;
1745   c->row            = 0;
1746   c->icol           = 0;
1747   c->saved_values   = 0;
1748   c->keepzeroedrows = a->keepzeroedrows;
1749   C->assembled      = PETSC_TRUE;
1750 
1751   C->M    = A->M;
1752   C->N    = A->N;
1753   C->bs   = A->bs;
1754   c->bs2  = a->bs2;
1755   c->mbs  = a->mbs;
1756   c->nbs  = a->nbs;
1757 
1758   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
1759   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
1760   for (i=0; i<mbs; i++) {
1761     c->imax[i] = a->imax[i];
1762     c->ilen[i] = a->ilen[i];
1763   }
1764 
1765   /* allocate the matrix space */
1766   c->singlemalloc = PETSC_TRUE;
1767   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
1768   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1769   c->j = (PetscInt*)(c->a + nz*bs2);
1770   c->i = c->j + nz;
1771   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1772   if (mbs > 0) {
1773     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1774     if (cpvalues == MAT_COPY_VALUES) {
1775       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1776     } else {
1777       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1778     }
1779   }
1780 
1781   ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1782   c->sorted      = a->sorted;
1783   c->roworiented = a->roworiented;
1784   c->nonew       = a->nonew;
1785 
1786   if (a->diag) {
1787     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
1788     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1789     for (i=0; i<mbs; i++) {
1790       c->diag[i] = a->diag[i];
1791     }
1792   } else c->diag        = 0;
1793   c->nz               = a->nz;
1794   c->maxnz            = a->maxnz;
1795   c->solve_work         = 0;
1796   c->mult_work          = 0;
1797   *B = C;
1798   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1804 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1805 {
1806   Mat_SeqSBAIJ   *a;
1807   Mat            B;
1808   PetscErrorCode ierr;
1809   int            fd;
1810   PetscMPIInt    size;
1811   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
1812   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1813   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
1814   PetscInt       *masked,nmask,tmp,bs2,ishift;
1815   PetscScalar    *aa;
1816   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1817 
1818   PetscFunctionBegin;
1819   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1820   bs2  = bs*bs;
1821 
1822   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1823   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1824   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1825   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1826   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1827   M = header[1]; N = header[2]; nz = header[3];
1828 
1829   if (header[3] < 0) {
1830     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1831   }
1832 
1833   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1834 
1835   /*
1836      This code adds extra rows to make sure the number of rows is
1837     divisible by the blocksize
1838   */
1839   mbs        = M/bs;
1840   extra_rows = bs - M + bs*(mbs);
1841   if (extra_rows == bs) extra_rows = 0;
1842   else                  mbs++;
1843   if (extra_rows) {
1844     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1845   }
1846 
1847   /* read in row lengths */
1848   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1849   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1850   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1851 
1852   /* read in column indices */
1853   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1854   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1855   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1856 
1857   /* loop over row lengths determining block row lengths */
1858   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
1859   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1860   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
1861   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1862   masked   = mask + mbs;
1863   rowcount = 0; nzcount = 0;
1864   for (i=0; i<mbs; i++) {
1865     nmask = 0;
1866     for (j=0; j<bs; j++) {
1867       kmax = rowlengths[rowcount];
1868       for (k=0; k<kmax; k++) {
1869         tmp = jj[nzcount++]/bs;   /* block col. index */
1870         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1871       }
1872       rowcount++;
1873     }
1874     s_browlengths[i] += nmask;
1875 
1876     /* zero out the mask elements we set */
1877     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1878   }
1879 
1880   /* create our matrix */
1881   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1882   ierr = MatSetType(B,type);CHKERRQ(ierr);
1883   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1884   a = (Mat_SeqSBAIJ*)B->data;
1885 
1886   /* set matrix "i" values */
1887   a->i[0] = 0;
1888   for (i=1; i<= mbs; i++) {
1889     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1890     a->ilen[i-1] = s_browlengths[i-1];
1891   }
1892   a->nz = a->i[mbs];
1893 
1894   /* read in nonzero values */
1895   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1896   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1897   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1898 
1899   /* set "a" and "j" values into matrix */
1900   nzcount = 0; jcount = 0;
1901   for (i=0; i<mbs; i++) {
1902     nzcountb = nzcount;
1903     nmask    = 0;
1904     for (j=0; j<bs; j++) {
1905       kmax = rowlengths[i*bs+j];
1906       for (k=0; k<kmax; k++) {
1907         tmp = jj[nzcount++]/bs; /* block col. index */
1908         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1909       }
1910     }
1911     /* sort the masked values */
1912     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1913 
1914     /* set "j" values into matrix */
1915     maskcount = 1;
1916     for (j=0; j<nmask; j++) {
1917       a->j[jcount++]  = masked[j];
1918       mask[masked[j]] = maskcount++;
1919     }
1920 
1921     /* set "a" values into matrix */
1922     ishift = bs2*a->i[i];
1923     for (j=0; j<bs; j++) {
1924       kmax = rowlengths[i*bs+j];
1925       for (k=0; k<kmax; k++) {
1926         tmp       = jj[nzcountb]/bs ; /* block col. index */
1927         if (tmp >= i){
1928           block     = mask[tmp] - 1;
1929           point     = jj[nzcountb] - bs*tmp;
1930           idx       = ishift + bs2*block + j + bs*point;
1931           a->a[idx] = aa[nzcountb];
1932         }
1933         nzcountb++;
1934       }
1935     }
1936     /* zero out the mask elements we set */
1937     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1938   }
1939   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1940 
1941   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1942   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1943   ierr = PetscFree(aa);CHKERRQ(ierr);
1944   ierr = PetscFree(jj);CHKERRQ(ierr);
1945   ierr = PetscFree(mask);CHKERRQ(ierr);
1946 
1947   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1948   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1949   ierr = MatView_Private(B);CHKERRQ(ierr);
1950   *A = B;
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1956 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1957 {
1958   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1959   MatScalar      *aa=a->a,*v,*v1;
1960   PetscScalar    *x,*b,*t,sum,d;
1961   PetscErrorCode ierr;
1962   PetscInt       m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j;
1963   PetscInt       nz,nz1,*vj,*vj1,i;
1964 
1965   PetscFunctionBegin;
1966   its = its*lits;
1967   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1968 
1969   if (bs > 1)
1970     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1971 
1972   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1973   if (xx != bb) {
1974     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1975   } else {
1976     b = x;
1977   }
1978 
1979   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1980 
1981   if (flag & SOR_ZERO_INITIAL_GUESS) {
1982     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1983       for (i=0; i<m; i++)
1984         t[i] = b[i];
1985 
1986       for (i=0; i<m; i++){
1987         d  = *(aa + ai[i]);  /* diag[i] */
1988         v  = aa + ai[i] + 1;
1989         vj = aj + ai[i] + 1;
1990         nz = ai[i+1] - ai[i] - 1;
1991         x[i] = omega*t[i]/d;
1992         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1993         PetscLogFlops(2*nz-1);
1994       }
1995     }
1996 
1997     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1998       for (i=0; i<m; i++)
1999         t[i] = b[i];
2000 
2001       for (i=0; i<m-1; i++){  /* update rhs */
2002         v  = aa + ai[i] + 1;
2003         vj = aj + ai[i] + 1;
2004         nz = ai[i+1] - ai[i] - 1;
2005         while (nz--) t[*vj++] -= x[i]*(*v++);
2006         PetscLogFlops(2*nz-1);
2007       }
2008       for (i=m-1; i>=0; i--){
2009         d  = *(aa + ai[i]);
2010         v  = aa + ai[i] + 1;
2011         vj = aj + ai[i] + 1;
2012         nz = ai[i+1] - ai[i] - 1;
2013         sum = t[i];
2014         while (nz--) sum -= x[*vj++]*(*v++);
2015         PetscLogFlops(2*nz-1);
2016         x[i] =   (1-omega)*x[i] + omega*sum/d;
2017       }
2018     }
2019     its--;
2020   }
2021 
2022   while (its--) {
2023     /*
2024        forward sweep:
2025        for i=0,...,m-1:
2026          sum[i] = (b[i] - U(i,:)x )/d[i];
2027          x[i]   = (1-omega)x[i] + omega*sum[i];
2028          b      = b - x[i]*U^T(i,:);
2029 
2030     */
2031     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2032       for (i=0; i<m; i++)
2033         t[i] = b[i];
2034 
2035       for (i=0; i<m; i++){
2036         d  = *(aa + ai[i]);  /* diag[i] */
2037         v  = aa + ai[i] + 1; v1=v;
2038         vj = aj + ai[i] + 1; vj1=vj;
2039         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2040         sum = t[i];
2041         while (nz1--) sum -= (*v1++)*x[*vj1++];
2042         x[i] = (1-omega)*x[i] + omega*sum/d;
2043         while (nz--) t[*vj++] -= x[i]*(*v++);
2044         PetscLogFlops(4*nz-2);
2045       }
2046     }
2047 
2048   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2049       /*
2050        backward sweep:
2051        b = b - x[i]*U^T(i,:), i=0,...,n-2
2052        for i=m-1,...,0:
2053          sum[i] = (b[i] - U(i,:)x )/d[i];
2054          x[i]   = (1-omega)x[i] + omega*sum[i];
2055       */
2056       for (i=0; i<m; i++)
2057         t[i] = b[i];
2058 
2059       for (i=0; i<m-1; i++){  /* update rhs */
2060         v  = aa + ai[i] + 1;
2061         vj = aj + ai[i] + 1;
2062         nz = ai[i+1] - ai[i] - 1;
2063         while (nz--) t[*vj++] -= x[i]*(*v++);
2064         PetscLogFlops(2*nz-1);
2065       }
2066       for (i=m-1; i>=0; i--){
2067         d  = *(aa + ai[i]);
2068         v  = aa + ai[i] + 1;
2069         vj = aj + ai[i] + 1;
2070         nz = ai[i+1] - ai[i] - 1;
2071         sum = t[i];
2072         while (nz--) sum -= x[*vj++]*(*v++);
2073         PetscLogFlops(2*nz-1);
2074         x[i] =   (1-omega)*x[i] + omega*sum/d;
2075       }
2076     }
2077   }
2078 
2079   ierr = PetscFree(t);CHKERRQ(ierr);
2080   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2081   if (bb != xx) {
2082     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2083   }
2084 
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 
2089 
2090 
2091 
2092 
2093