xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 28adb3f739167aae2a3fdf9bf501dbd0150de1de)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "../src/mat/impls/sbaij/seq/sbaij.h"
9 
10 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth);
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A,PetscTruth *missing,PetscInt *dd)
19 {
20   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       *diag,*jj = a->j,i;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   *missing = PETSC_FALSE;
28   for (i=0; i<a->mbs; i++) {
29     if (jj[diag[i]] != i) {
30       *missing    = PETSC_TRUE;
31       if (dd) *dd = i;
32       break;
33     }
34   }
35   PetscFunctionReturn(0);
36 }
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
40 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
41 {
42   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       i;
45 
46   PetscFunctionBegin;
47   if (!a->diag) {
48     ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
49   }
50   for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i];
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
56 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
57 {
58   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
59   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs;
60   PetscErrorCode ierr;
61 
62   PetscFunctionBegin;
63   *nn = n;
64   if (!ia) PetscFunctionReturn(0);
65   if (!blockcompressed) {
66     /* malloc & create the natural set of indices */
67     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
68     for (i=0; i<n+1; i++) {
69       for (j=0; j<bs; j++) {
70         *ia[i*bs+j] = a->i[i]*bs+j+oshift;
71       }
72     }
73     for (i=0; i<nz; i++) {
74       for (j=0; j<bs; j++) {
75         *ja[i*bs+j] = a->j[i]*bs+j+oshift;
76       }
77     }
78   } else { /* blockcompressed */
79     if (oshift == 1) {
80       /* temporarily add 1 to i and j indices */
81       for (i=0; i<nz; i++) a->j[i]++;
82       for (i=0; i<n+1; i++) a->i[i]++;
83     }
84     *ia = a->i; *ja = a->j;
85   }
86 
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
92 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
93 {
94   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
95   PetscInt     i,n = a->mbs,nz = a->i[n];
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99   if (!ia) PetscFunctionReturn(0);
100 
101   if (!blockcompressed) {
102     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
103   } else if (oshift == 1) { /* blockcompressed */
104     for (i=0; i<nz; i++) a->j[i]--;
105     for (i=0; i<n+1; i++) a->i[i]--;
106   }
107 
108   PetscFunctionReturn(0);
109 }
110 
111 #undef __FUNCT__
112 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
113 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
114 {
115   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119 #if defined(PETSC_USE_LOG)
120   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap->N,a->nz);
121 #endif
122   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
123   if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);}
124   if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);}
125   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
126   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
127   if (a->inode.size) {ierr = PetscFree(a->inode.size);CHKERRQ(ierr);}
128   ierr = PetscFree(a->diag);CHKERRQ(ierr);
129   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
130   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
131   ierr = PetscFree(a->relax_work);CHKERRQ(ierr);
132   ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
133   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
134   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
135   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
136   ierr = PetscFree(a->jshort);CHKERRQ(ierr);
137   ierr = PetscFree(a->inew);CHKERRQ(ierr);
138   ierr = PetscFree(a);CHKERRQ(ierr);
139 
140   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
141   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
142   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
143   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
144   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
145   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
146   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 #undef __FUNCT__
151 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
152 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg)
153 {
154   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
155   PetscErrorCode ierr;
156 
157   PetscFunctionBegin;
158   switch (op) {
159   case MAT_ROW_ORIENTED:
160     a->roworiented = flg;
161     break;
162   case MAT_KEEP_NONZERO_PATTERN:
163     a->keepnonzeropattern = flg;
164     break;
165   case MAT_NEW_NONZERO_LOCATIONS:
166     a->nonew = (flg ? 0 : 1);
167     break;
168   case MAT_NEW_NONZERO_LOCATION_ERR:
169     a->nonew = (flg ? -1 : 0);
170     break;
171   case MAT_NEW_NONZERO_ALLOCATION_ERR:
172     a->nonew = (flg ? -2 : 0);
173     break;
174   case MAT_UNUSED_NONZERO_LOCATION_ERR:
175     a->nounused = (flg ? -1 : 0);
176     break;
177   case MAT_NEW_DIAGONALS:
178   case MAT_IGNORE_OFF_PROC_ENTRIES:
179   case MAT_USE_HASH_TABLE:
180     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
181     break;
182   case MAT_HERMITIAN:
183     if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
184   case MAT_SYMMETRIC:
185   case MAT_STRUCTURALLY_SYMMETRIC:
186   case MAT_SYMMETRY_ETERNAL:
187     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
188     ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr);
189     break;
190   case MAT_IGNORE_LOWER_TRIANGULAR:
191     a->ignore_ltriangular = flg;
192     break;
193   case MAT_ERROR_LOWER_TRIANGULAR:
194     a->ignore_ltriangular = flg;
195     break;
196   case MAT_GETROW_UPPERTRIANGULAR:
197     a->getrow_utriangular = flg;
198     break;
199   default:
200     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
201   }
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
207 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
208 {
209   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
210   PetscErrorCode ierr;
211   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
212   MatScalar      *aa,*aa_i;
213   PetscScalar    *v_i;
214 
215   PetscFunctionBegin;
216   if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()");
217   /* Get the upper triangular part of the row */
218   bs  = A->rmap->bs;
219   ai  = a->i;
220   aj  = a->j;
221   aa  = a->a;
222   bs2 = a->bs2;
223 
224   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
225 
226   bn  = row/bs;   /* Block number */
227   bp  = row % bs; /* Block position */
228   M   = ai[bn+1] - ai[bn];
229   *ncols = bs*M;
230 
231   if (v) {
232     *v = 0;
233     if (*ncols) {
234       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
235       for (i=0; i<M; i++) { /* for each block in the block row */
236         v_i  = *v + i*bs;
237         aa_i = aa + bs2*(ai[bn] + i);
238         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
239       }
240     }
241   }
242 
243   if (cols) {
244     *cols = 0;
245     if (*ncols) {
246       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
247       for (i=0; i<M; i++) { /* for each block in the block row */
248         cols_i = *cols + i*bs;
249         itmp  = bs*aj[ai[bn] + i];
250         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
251       }
252     }
253   }
254 
255   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
256   /* this segment is currently removed, so only entries in the upper triangle are obtained */
257 #ifdef column_search
258   v_i    = *v    + M*bs;
259   cols_i = *cols + M*bs;
260   for (i=0; i<bn; i++){ /* for each block row */
261     M = ai[i+1] - ai[i];
262     for (j=0; j<M; j++){
263       itmp = aj[ai[i] + j];    /* block column value */
264       if (itmp == bn){
265         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
266         for (k=0; k<bs; k++) {
267           *cols_i++ = i*bs+k;
268           *v_i++    = aa_i[k];
269         }
270         *ncols += bs;
271         break;
272       }
273     }
274   }
275 #endif
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
281 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
282 {
283   PetscErrorCode ierr;
284 
285   PetscFunctionBegin;
286   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
287   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
288   PetscFunctionReturn(0);
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ"
293 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
294 {
295   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
296 
297   PetscFunctionBegin;
298   a->getrow_utriangular = PETSC_TRUE;
299   PetscFunctionReturn(0);
300 }
301 #undef __FUNCT__
302 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ"
303 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
304 {
305   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
306 
307   PetscFunctionBegin;
308   a->getrow_utriangular = PETSC_FALSE;
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
314 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,MatReuse reuse,Mat *B)
315 {
316   PetscErrorCode ierr;
317   PetscFunctionBegin;
318   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
319     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
326 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
327 {
328   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
329   PetscErrorCode    ierr;
330   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
331   const char        *name;
332   PetscViewerFormat format;
333 
334   PetscFunctionBegin;
335   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
336   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
337   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
338     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
339   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
340     Mat aij;
341 
342     if (A->factor && bs>1){
343       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr);
344       PetscFunctionReturn(0);
345     }
346     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
347     ierr = MatView(aij,viewer);CHKERRQ(ierr);
348     ierr = MatDestroy(aij);CHKERRQ(ierr);
349   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
350     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
351     for (i=0; i<a->mbs; i++) {
352       for (j=0; j<bs; j++) {
353         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
354         for (k=a->i[i]; k<a->i[i+1]; k++) {
355           for (l=0; l<bs; l++) {
356 #if defined(PETSC_USE_COMPLEX)
357             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
358               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
359                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
360             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
361               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
362                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
363             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
364               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
365             }
366 #else
367             if (a->a[bs2*k + l*bs + j] != 0.0) {
368               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
369             }
370 #endif
371           }
372         }
373         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
374       }
375     }
376     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
377   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
378      PetscFunctionReturn(0);
379   } else {
380     if (A->factor && bs>1){
381       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr);
382     }
383     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
384     for (i=0; i<a->mbs; i++) {
385       for (j=0; j<bs; j++) {
386         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
387         for (k=a->i[i]; k<a->i[i+1]; k++) {
388           for (l=0; l<bs; l++) {
389 #if defined(PETSC_USE_COMPLEX)
390             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
391               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
392                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
393             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
394               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
395                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
396             } else {
397               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
398             }
399 #else
400             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
401 #endif
402           }
403         }
404         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
405       }
406     }
407     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
408   }
409   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
415 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
416 {
417   Mat            A = (Mat) Aa;
418   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
419   PetscErrorCode ierr;
420   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
421   PetscMPIInt    rank;
422   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
423   MatScalar      *aa;
424   MPI_Comm       comm;
425   PetscViewer    viewer;
426 
427   PetscFunctionBegin;
428   /*
429     This is nasty. If this is called from an originally parallel matrix
430     then all processes call this,but only the first has the matrix so the
431     rest should return immediately.
432   */
433   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
434   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
435   if (rank) PetscFunctionReturn(0);
436 
437   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
438 
439   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
440   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
441 
442   /* loop over matrix elements drawing boxes */
443   color = PETSC_DRAW_BLUE;
444   for (i=0,row=0; i<mbs; i++,row+=bs) {
445     for (j=a->i[i]; j<a->i[i+1]; j++) {
446       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
447       x_l = a->j[j]*bs; x_r = x_l + 1.0;
448       aa = a->a + j*bs2;
449       for (k=0; k<bs; k++) {
450         for (l=0; l<bs; l++) {
451           if (PetscRealPart(*aa++) >=  0.) continue;
452           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
453         }
454       }
455     }
456   }
457   color = PETSC_DRAW_CYAN;
458   for (i=0,row=0; i<mbs; i++,row+=bs) {
459     for (j=a->i[i]; j<a->i[i+1]; j++) {
460       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
461       x_l = a->j[j]*bs; x_r = x_l + 1.0;
462       aa = a->a + j*bs2;
463       for (k=0; k<bs; k++) {
464         for (l=0; l<bs; l++) {
465           if (PetscRealPart(*aa++) != 0.) continue;
466           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
467         }
468       }
469     }
470   }
471 
472   color = PETSC_DRAW_RED;
473   for (i=0,row=0; i<mbs; i++,row+=bs) {
474     for (j=a->i[i]; j<a->i[i+1]; j++) {
475       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
476       x_l = a->j[j]*bs; x_r = x_l + 1.0;
477       aa = a->a + j*bs2;
478       for (k=0; k<bs; k++) {
479         for (l=0; l<bs; l++) {
480           if (PetscRealPart(*aa++) <= 0.) continue;
481           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
482         }
483       }
484     }
485   }
486   PetscFunctionReturn(0);
487 }
488 
489 #undef __FUNCT__
490 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
491 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
492 {
493   PetscErrorCode ierr;
494   PetscReal      xl,yl,xr,yr,w,h;
495   PetscDraw      draw;
496   PetscTruth     isnull;
497 
498   PetscFunctionBegin;
499   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
500   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
501 
502   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
503   xr  = A->rmap->N; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
504   xr += w;    yr += h;  xl = -w;     yl = -h;
505   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
506   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
507   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatView_SeqSBAIJ"
513 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
514 {
515   PetscErrorCode ierr;
516   PetscTruth     iascii,isdraw;
517 
518   PetscFunctionBegin;
519   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
520   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
521   if (iascii){
522     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
523   } else if (isdraw) {
524     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
525   } else {
526     Mat B;
527     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
528     ierr = MatView(B,viewer);CHKERRQ(ierr);
529     ierr = MatDestroy(B);CHKERRQ(ierr);
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 
535 #undef __FUNCT__
536 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
537 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
538 {
539   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
540   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
541   PetscInt     *ai = a->i,*ailen = a->ilen;
542   PetscInt     brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
543   MatScalar    *ap,*aa = a->a;
544 
545   PetscFunctionBegin;
546   for (k=0; k<m; k++) { /* loop over rows */
547     row  = im[k]; brow = row/bs;
548     if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */
549     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
550     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
551     nrow = ailen[brow];
552     for (l=0; l<n; l++) { /* loop over columns */
553       if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */
554       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
555       col  = in[l] ;
556       bcol = col/bs;
557       cidx = col%bs;
558       ridx = row%bs;
559       high = nrow;
560       low  = 0; /* assume unsorted */
561       while (high-low > 5) {
562         t = (low+high)/2;
563         if (rp[t] > bcol) high = t;
564         else             low  = t;
565       }
566       for (i=low; i<high; i++) {
567         if (rp[i] > bcol) break;
568         if (rp[i] == bcol) {
569           *v++ = ap[bs2*i+bs*cidx+ridx];
570           goto finished;
571         }
572       }
573       *v++ = 0.0;
574       finished:;
575     }
576   }
577   PetscFunctionReturn(0);
578 }
579 
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
583 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
584 {
585   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
586   PetscErrorCode    ierr;
587   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
588   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
589   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
590   PetscTruth        roworiented=a->roworiented;
591   const PetscScalar *value = v;
592   MatScalar         *ap,*aa = a->a,*bap;
593 
594   PetscFunctionBegin;
595   if (roworiented) {
596     stepval = (n-1)*bs;
597   } else {
598     stepval = (m-1)*bs;
599   }
600   for (k=0; k<m; k++) { /* loop over added rows */
601     row  = im[k];
602     if (row < 0) continue;
603 #if defined(PETSC_USE_DEBUG)
604     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
605 #endif
606     rp   = aj + ai[row];
607     ap   = aa + bs2*ai[row];
608     rmax = imax[row];
609     nrow = ailen[row];
610     low  = 0;
611     high = nrow;
612     for (l=0; l<n; l++) { /* loop over added columns */
613       if (in[l] < 0) continue;
614       col = in[l];
615 #if defined(PETSC_USE_DEBUG)
616       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
617 #endif
618       if (col < row) continue; /* ignore lower triangular block */
619       if (roworiented) {
620         value = v + k*(stepval+bs)*bs + l*bs;
621       } else {
622         value = v + l*(stepval+bs)*bs + k*bs;
623       }
624       if (col <= lastcol) low = 0; else high = nrow;
625       lastcol = col;
626       while (high-low > 7) {
627         t = (low+high)/2;
628         if (rp[t] > col) high = t;
629         else             low  = t;
630       }
631       for (i=low; i<high; i++) {
632         if (rp[i] > col) break;
633         if (rp[i] == col) {
634           bap  = ap +  bs2*i;
635           if (roworiented) {
636             if (is == ADD_VALUES) {
637               for (ii=0; ii<bs; ii++,value+=stepval) {
638                 for (jj=ii; jj<bs2; jj+=bs) {
639                   bap[jj] += *value++;
640                 }
641               }
642             } else {
643               for (ii=0; ii<bs; ii++,value+=stepval) {
644                 for (jj=ii; jj<bs2; jj+=bs) {
645                   bap[jj] = *value++;
646                 }
647                }
648             }
649           } else {
650             if (is == ADD_VALUES) {
651               for (ii=0; ii<bs; ii++,value+=stepval) {
652                 for (jj=0; jj<bs; jj++) {
653                   *bap++ += *value++;
654                 }
655               }
656             } else {
657               for (ii=0; ii<bs; ii++,value+=stepval) {
658                 for (jj=0; jj<bs; jj++) {
659                   *bap++  = *value++;
660                 }
661               }
662             }
663           }
664           goto noinsert2;
665         }
666       }
667       if (nonew == 1) goto noinsert2;
668       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
669       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
670       N = nrow++ - 1; high++;
671       /* shift up all the later entries in this row */
672       for (ii=N; ii>=i; ii--) {
673         rp[ii+1] = rp[ii];
674         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
675       }
676       if (N >= i) {
677         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
678       }
679       rp[i] = col;
680       bap   = ap +  bs2*i;
681       if (roworiented) {
682         for (ii=0; ii<bs; ii++,value+=stepval) {
683           for (jj=ii; jj<bs2; jj+=bs) {
684             bap[jj] = *value++;
685           }
686         }
687       } else {
688         for (ii=0; ii<bs; ii++,value+=stepval) {
689           for (jj=0; jj<bs; jj++) {
690             *bap++  = *value++;
691           }
692         }
693        }
694     noinsert2:;
695       low = i;
696     }
697     ailen[row] = nrow;
698   }
699    PetscFunctionReturn(0);
700 }
701 
702 /*
703     This is not yet used
704 */
705 #undef __FUNCT__
706 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ_Inode"
707 PetscErrorCode MatAssemblyEnd_SeqSBAIJ_Inode(Mat A)
708 {
709   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
710   PetscErrorCode  ierr;
711   const PetscInt  *ai = a->i, *aj = a->j,*cols;
712   PetscInt        i = 0,j,blk_size,m = A->rmap->n,node_count = 0,nzx,nzy,*ns,row,nz,cnt,cnt2,*counts;
713   PetscTruth      flag;
714 
715   PetscFunctionBegin;
716   ierr = PetscMalloc(m*sizeof(PetscInt),&ns);CHKERRQ(ierr);
717   while (i < m){
718     nzx = ai[i+1] - ai[i];       /* Number of nonzeros */
719     /* Limits the number of elements in a node to 'a->inode.limit' */
720     for (j=i+1,blk_size=1; j<m && blk_size <a->inode.limit; ++j,++blk_size) {
721       nzy  = ai[j+1] - ai[j];
722       if (nzy != (nzx - j + i)) break;
723       ierr = PetscMemcmp(aj + ai[i] + j - i,aj + ai[j],nzy*sizeof(PetscInt),&flag);CHKERRQ(ierr);
724       if (!flag) break;
725     }
726     ns[node_count++] = blk_size;
727     i = j;
728   }
729   if (!a->inode.size && m && node_count > .9*m) {
730     ierr = PetscFree(ns);CHKERRQ(ierr);
731     ierr = PetscInfo2(A,"Found %D nodes out of %D rows. Not using Inode routines\n",node_count,m);CHKERRQ(ierr);
732   } else {
733     a->inode.node_count = node_count;
734     ierr = PetscMalloc(node_count*sizeof(PetscInt),&a->inode.size);CHKERRQ(ierr);
735     ierr = PetscMemcpy(a->inode.size,ns,node_count*sizeof(PetscInt));
736     ierr = PetscFree(ns);CHKERRQ(ierr);
737     ierr = PetscInfo3(A,"Found %D nodes of %D. Limit used: %D. Using Inode routines\n",node_count,m,a->inode.limit);CHKERRQ(ierr);
738 
739     /* count collections of adjacent columns in each inode */
740     row = 0;
741     cnt = 0;
742     for (i=0; i<node_count; i++) {
743       cols = aj + ai[row] + a->inode.size[i];
744       nz   = ai[row+1] - ai[row] - a->inode.size[i];
745       for (j=1; j<nz; j++) {
746         if (cols[j] != cols[j-1]+1) {
747           cnt++;
748         }
749       }
750       cnt++;
751       row += a->inode.size[i];
752     }
753     ierr = PetscMalloc(2*cnt*sizeof(PetscInt),&counts);CHKERRQ(ierr);
754     cnt = 0;
755     row = 0;
756     for (i=0; i<node_count; i++) {
757       cols          = aj + ai[row] + a->inode.size[i];
758 	  CHKMEMQ;
759       counts[2*cnt] = cols[0];
760 	  CHKMEMQ;
761       nz            = ai[row+1] - ai[row] - a->inode.size[i];
762       cnt2          = 1;
763       for (j=1; j<nz; j++) {
764         if (cols[j] != cols[j-1]+1) {
765 	  CHKMEMQ;
766           counts[2*(cnt++)+1] = cnt2;
767           counts[2*cnt]       = cols[j];
768 	  CHKMEMQ;
769           cnt2                = 1;
770         } else cnt2++;
771       }
772 	  CHKMEMQ;
773       counts[2*(cnt++)+1] = cnt2;
774 	  CHKMEMQ;
775       row += a->inode.size[i];
776     }
777     ierr = PetscIntView(2*cnt,counts,0);
778   }
779 
780 
781   PetscFunctionReturn(0);
782 }
783 
784 #undef __FUNCT__
785 #define __FUNCT__ "MatRelax_SeqSBAIJ_1_ushort"
786 PetscErrorCode MatRelax_SeqSBAIJ_1_ushort(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
787 {
788   Mat_SeqSBAIJ         *a = (Mat_SeqSBAIJ*)A->data;
789   const MatScalar      *aa=a->a,*v,*v1,*aidiag;
790   PetscScalar          *x,*b,*t,sum;
791   MatScalar            tmp;
792   PetscErrorCode       ierr;
793   PetscInt             m=a->mbs,bs=A->rmap->bs,j;
794   const PetscInt       *ai=a->i;
795   const unsigned short *aj=a->jshort,*vj,*vj1;
796   PetscInt             nz,nz1,i;
797 
798   PetscFunctionBegin;
799   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
800 
801   its = its*lits;
802   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
803 
804   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
805 
806   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
807   if (xx != bb) {
808     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
809   } else {
810     b = x;
811   }
812 
813   if (!a->idiagvalid) {
814     if (!a->idiag) {
815       ierr     = PetscMalloc(m*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
816     }
817     for (i=0; i<a->mbs; i++) a->idiag[i] = 1.0/a->a[a->i[i]];
818     a->idiagvalid = PETSC_TRUE;
819   }
820 
821   if (!a->relax_work) {
822     ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr);
823   }
824   t = a->relax_work;
825 
826   aidiag = a->idiag;
827   if (flag & SOR_ZERO_INITIAL_GUESS) {
828     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
829       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
830 
831       v  = aa + 1;
832       vj = aj + 1;
833       for (i=0; i<m; i++){
834         nz = ai[i+1] - ai[i] - 1;
835         tmp = - (x[i] = omega*t[i]*aidiag[i]);
836         for (j=0; j<nz; j++) {
837           t[vj[j]] += tmp*v[j];
838         }
839         v  += nz + 1;
840         vj += nz + 1;
841       }
842       ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
843     }
844 
845     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
846       int nz2;
847       if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){
848         t = b;
849       }
850 
851       v  = aa + ai[m-1] + 1;
852       vj = aj + ai[m-1] + 1;
853       nz = 0;
854       for (i=m-1; i>=0; i--){
855         sum = 0.0;
856         nz2 = ai[i] - ai[i-1] - 1;
857 	PETSC_Prefetch(v-nz2-1,0,1);
858 	PETSC_Prefetch(vj-nz2-1,0,1);
859         PetscSparseDensePlusDot(sum,x,v,vj,nz);
860         sum = t[i] - sum;
861         x[i] =   (1-omega)*x[i] + omega*sum*aidiag[i];
862         nz  = nz2;
863         v  -= nz + 1;
864         vj -= nz + 1;
865       }
866       t = a->relax_work;
867       ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
868     }
869     its--;
870   }
871 
872   while (its--) {
873     /*
874        forward sweep:
875        for i=0,...,m-1:
876          sum[i] = (b[i] - U(i,:)x )/d[i];
877          x[i]   = (1-omega)x[i] + omega*sum[i];
878          b      = b - x[i]*U^T(i,:);
879 
880     */
881     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
882       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
883 
884       for (i=0; i<m; i++){
885         v  = aa + ai[i] + 1; v1=v;
886         vj = aj + ai[i] + 1; vj1=vj;
887         nz = ai[i+1] - ai[i] - 1; nz1=nz;
888         sum = t[i];
889         ierr = PetscLogFlops(4.0*nz-2);CHKERRQ(ierr);
890         while (nz1--) sum -= (*v1++)*x[*vj1++];
891         x[i] = (1-omega)*x[i] + omega*sum*aidiag[i];
892         while (nz--) t[*vj++] -= x[i]*(*v++);
893       }
894     }
895 
896     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
897       /*
898        backward sweep:
899        b = b - x[i]*U^T(i,:), i=0,...,n-2
900        for i=m-1,...,0:
901          sum[i] = (b[i] - U(i,:)x )/d[i];
902          x[i]   = (1-omega)x[i] + omega*sum[i];
903       */
904       /* if there was a forward sweep done above then I thing the next two for loops are not needed */
905       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
906 
907       for (i=0; i<m-1; i++){  /* update rhs */
908         v  = aa + ai[i] + 1;
909         vj = aj + ai[i] + 1;
910         nz = ai[i+1] - ai[i] - 1;
911         ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr);
912         while (nz--) t[*vj++] -= x[i]*(*v++);
913       }
914       for (i=m-1; i>=0; i--){
915         v  = aa + ai[i] + 1;
916         vj = aj + ai[i] + 1;
917         nz = ai[i+1] - ai[i] - 1;
918         ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr);
919         sum = t[i];
920         while (nz--) sum -= x[*vj++]*(*v++);
921         x[i] =   (1-omega)*x[i] + omega*sum*aidiag[i];
922       }
923     }
924   }
925 
926   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
927   if (bb != xx) {
928     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatMult_SeqSBAIJ_1_ushort"
935 PetscErrorCode MatMult_SeqSBAIJ_1_ushort(Mat A,Vec xx,Vec zz)
936 {
937   Mat_SeqSBAIJ         *a = (Mat_SeqSBAIJ*)A->data;
938   const PetscScalar    *x;
939   PetscScalar          *z,x1,sum;
940   const MatScalar      *v;
941   PetscErrorCode       ierr;
942   PetscInt             mbs=a->mbs,i,j,nz;
943   const unsigned short *ib=a->jshort;
944   const PetscInt       *ai=a->i;
945 
946   PetscFunctionBegin;
947   ierr = VecSet(zz,0.0);CHKERRQ(ierr);
948   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
949   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
950 
951   v  = a->a;
952   for (i=0; i<mbs; i++) {
953     nz   = ai[i+1] - ai[i];        /* length of i_th row of A */
954     x1   = x[i];
955     sum  = v[0]*x1;                /* diagonal term */
956     for (j=1; j<nz; j++) {
957       z[ib[j]] += v[j] * x1;       /* (strict lower triangular part of A)*x  */
958       sum      += v[j] * x[ib[j]]; /* (strict upper triangular part of A)*x  */
959     }
960     z[i] += sum;
961     v    += nz;
962     ib   += nz;
963   }
964 
965   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
966   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
967   ierr = PetscLogFlops(2.0*(2.0*a->nz - mbs) - mbs);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
973 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
974 {
975   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
976   PetscErrorCode ierr;
977   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
978   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
979   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
980   MatScalar      *aa = a->a,*ap;
981 
982   PetscFunctionBegin;
983   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
984 
985   if (m) rmax = ailen[0];
986   for (i=1; i<mbs; i++) {
987     /* move each row back by the amount of empty slots (fshift) before it*/
988     fshift += imax[i-1] - ailen[i-1];
989      rmax   = PetscMax(rmax,ailen[i]);
990      if (fshift) {
991        ip = aj + ai[i]; ap = aa + bs2*ai[i];
992        N = ailen[i];
993        for (j=0; j<N; j++) {
994          ip[j-fshift] = ip[j];
995          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
996        }
997      }
998      ai[i] = ai[i-1] + ailen[i-1];
999   }
1000   if (mbs) {
1001     fshift += imax[mbs-1] - ailen[mbs-1];
1002      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1003   }
1004   /* reset ilen and imax for each row */
1005   for (i=0; i<mbs; i++) {
1006     ailen[i] = imax[i] = ai[i+1] - ai[i];
1007   }
1008   a->nz = ai[mbs];
1009 
1010   /* diagonals may have moved, reset it */
1011   if (a->diag) {
1012     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1013   }
1014   if (fshift && a->nounused == -1) {
1015     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1016   }
1017   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap->N,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1018   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1019   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1020   a->reallocs          = 0;
1021   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1022   a->idiagvalid = PETSC_FALSE;
1023 
1024   if (A->cmap->n < 65536 && A->cmap->bs == 1) {
1025     if (!a->jshort) {
1026       ierr = PetscMalloc(a->i[A->rmap->n]*sizeof(unsigned short),&a->jshort);CHKERRQ(ierr);
1027       for (i=0; i<a->i[A->rmap->n]; i++) a->jshort[i] = a->j[i];
1028       A->ops->mult  = MatMult_SeqSBAIJ_1_ushort;
1029       A->ops->relax = MatRelax_SeqSBAIJ_1_ushort;
1030     }
1031   }
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 /*
1036    This function returns an array of flags which indicate the locations of contiguous
1037    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1038    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1039    Assume: sizes should be long enough to hold all the values.
1040 */
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
1043 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1044 {
1045   PetscInt   i,j,k,row;
1046   PetscTruth flg;
1047 
1048   PetscFunctionBegin;
1049    for (i=0,j=0; i<n; j++) {
1050      row = idx[i];
1051      if (row%bs!=0) { /* Not the begining of a block */
1052        sizes[j] = 1;
1053        i++;
1054      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
1055        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1056        i++;
1057      } else { /* Begining of the block, so check if the complete block exists */
1058        flg = PETSC_TRUE;
1059        for (k=1; k<bs; k++) {
1060          if (row+k != idx[i+k]) { /* break in the block */
1061            flg = PETSC_FALSE;
1062            break;
1063          }
1064        }
1065        if (flg) { /* No break in the bs */
1066          sizes[j] = bs;
1067          i+= bs;
1068        } else {
1069          sizes[j] = 1;
1070          i++;
1071        }
1072      }
1073    }
1074    *bs_max = j;
1075    PetscFunctionReturn(0);
1076 }
1077 
1078 
1079 /* Only add/insert a(i,j) with i<=j (blocks).
1080    Any a(i,j) with i>j input by user is ingored.
1081 */
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
1085 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1086 {
1087   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1088   PetscErrorCode ierr;
1089   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1090   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
1091   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
1092   PetscInt       ridx,cidx,bs2=a->bs2;
1093   MatScalar      *ap,value,*aa=a->a,*bap;
1094 
1095   PetscFunctionBegin;
1096   for (k=0; k<m; k++) { /* loop over added rows */
1097     row  = im[k];       /* row number */
1098     brow = row/bs;      /* block row number */
1099     if (row < 0) continue;
1100 #if defined(PETSC_USE_DEBUG)
1101     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
1102 #endif
1103     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
1104     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
1105     rmax = imax[brow];  /* maximum space allocated for this row */
1106     nrow = ailen[brow]; /* actual length of this row */
1107     low  = 0;
1108 
1109     for (l=0; l<n; l++) { /* loop over added columns */
1110       if (in[l] < 0) continue;
1111 #if defined(PETSC_USE_DEBUG)
1112       if (in[l] >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap->N-1);
1113 #endif
1114       col = in[l];
1115       bcol = col/bs;              /* block col number */
1116 
1117       if (brow > bcol) {
1118         if (a->ignore_ltriangular){
1119           continue; /* ignore lower triangular values */
1120         } else {
1121           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
1122         }
1123       }
1124 
1125       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
1126       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
1127         /* element value a(k,l) */
1128         if (roworiented) {
1129           value = v[l + k*n];
1130         } else {
1131           value = v[k + l*m];
1132         }
1133 
1134         /* move pointer bap to a(k,l) quickly and add/insert value */
1135         if (col <= lastcol) low = 0; high = nrow;
1136         lastcol = col;
1137         while (high-low > 7) {
1138           t = (low+high)/2;
1139           if (rp[t] > bcol) high = t;
1140           else              low  = t;
1141         }
1142         for (i=low; i<high; i++) {
1143           if (rp[i] > bcol) break;
1144           if (rp[i] == bcol) {
1145             bap  = ap +  bs2*i + bs*cidx + ridx;
1146             if (is == ADD_VALUES) *bap += value;
1147             else                  *bap  = value;
1148             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
1149             if (brow == bcol && ridx < cidx){
1150               bap  = ap +  bs2*i + bs*ridx + cidx;
1151               if (is == ADD_VALUES) *bap += value;
1152               else                  *bap  = value;
1153             }
1154             goto noinsert1;
1155           }
1156         }
1157 
1158         if (nonew == 1) goto noinsert1;
1159         if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1160         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1161 
1162         N = nrow++ - 1; high++;
1163         /* shift up all the later entries in this row */
1164         for (ii=N; ii>=i; ii--) {
1165           rp[ii+1] = rp[ii];
1166           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1167         }
1168         if (N>=i) {
1169           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1170         }
1171         rp[i]                      = bcol;
1172         ap[bs2*i + bs*cidx + ridx] = value;
1173       noinsert1:;
1174         low = i;
1175       }
1176     }   /* end of loop over added columns */
1177     ailen[brow] = nrow;
1178   }   /* end of loop over added rows */
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1184 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,const MatFactorInfo *info)
1185 {
1186   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1187   Mat            outA;
1188   PetscErrorCode ierr;
1189   PetscTruth     row_identity;
1190 
1191   PetscFunctionBegin;
1192   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc");
1193   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1194   if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported");
1195   if (inA->rmap->bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap->bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */
1196 
1197   outA        = inA;
1198   inA->factor = MAT_FACTOR_ICC;
1199 
1200   ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1201   ierr = MatSeqSBAIJSetNumericFactorization(inA,row_identity);CHKERRQ(ierr);
1202 
1203   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1204   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1205   a->row = row;
1206   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1207   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1208   a->col = row;
1209 
1210   /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */
1211   if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);}
1212   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1213 
1214   if (!a->solve_work) {
1215     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1216     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1217   }
1218 
1219   ierr = MatCholeskyFactorNumeric(outA,inA,info);CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 EXTERN_C_BEGIN
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1226 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1227 {
1228   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1229   PetscInt     i,nz,n;
1230 
1231   PetscFunctionBegin;
1232   nz = baij->maxnz;
1233   n  = mat->cmap->n;
1234   for (i=0; i<nz; i++) {
1235     baij->j[i] = indices[i];
1236   }
1237    baij->nz = nz;
1238    for (i=0; i<n; i++) {
1239      baij->ilen[i] = baij->imax[i];
1240    }
1241    PetscFunctionReturn(0);
1242 }
1243 EXTERN_C_END
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1247 /*@
1248   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1249   in the matrix.
1250 
1251   Input Parameters:
1252   +  mat     - the SeqSBAIJ matrix
1253   -  indices - the column indices
1254 
1255   Level: advanced
1256 
1257   Notes:
1258   This can be called if you have precomputed the nonzero structure of the
1259   matrix and want to provide it to the matrix object to improve the performance
1260   of the MatSetValues() operation.
1261 
1262   You MUST have set the correct numbers of nonzeros per row in the call to
1263   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1264 
1265   MUST be called before any calls to MatSetValues()
1266 
1267   .seealso: MatCreateSeqSBAIJ
1268 @*/
1269 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1270 {
1271   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1272 
1273   PetscFunctionBegin;
1274   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1275   PetscValidPointer(indices,2);
1276   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1277   if (f) {
1278     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1279   } else {
1280     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1281   }
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatCopy_SeqSBAIJ"
1287 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1288 {
1289   PetscErrorCode ierr;
1290 
1291   PetscFunctionBegin;
1292   /* If the two matrices have the same copy implementation, use fast copy. */
1293   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1294     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1295     Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1296 
1297     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
1298       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1299     }
1300     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
1301   } else {
1302     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1303     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1304     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1311 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1312 {
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1322 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1323 {
1324   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1325   PetscFunctionBegin;
1326   *array = a->a;
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 #undef __FUNCT__
1331 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1332 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1333 {
1334   PetscFunctionBegin;
1335   PetscFunctionReturn(0);
1336  }
1337 
1338 #include "petscblaslapack.h"
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1341 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1342 {
1343   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1344   PetscErrorCode ierr;
1345   PetscInt       i,bs=Y->rmap->bs,bs2,j;
1346   PetscBLASInt   one = 1,bnz = PetscBLASIntCast(x->nz);
1347 
1348   PetscFunctionBegin;
1349   if (str == SAME_NONZERO_PATTERN) {
1350     PetscScalar alpha = a;
1351     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1352   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1353     if (y->xtoy && y->XtoY != X) {
1354       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1355       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1356     }
1357     if (!y->xtoy) { /* get xtoy */
1358       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1359       y->XtoY = X;
1360     }
1361     bs2 = bs*bs;
1362     for (i=0; i<x->nz; i++) {
1363       j = 0;
1364       while (j < bs2){
1365         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1366         j++;
1367       }
1368     }
1369     ierr = PetscInfo3(Y,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1370   } else {
1371     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1372     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1373     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1374   }
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1380 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1381 {
1382   PetscFunctionBegin;
1383   *flg = PETSC_TRUE;
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1389 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1390 {
1391    PetscFunctionBegin;
1392    *flg = PETSC_TRUE;
1393    PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1398 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1399  {
1400    PetscFunctionBegin;
1401    *flg = PETSC_FALSE;
1402    PetscFunctionReturn(0);
1403  }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatRealPart_SeqSBAIJ"
1407 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1408 {
1409   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1410   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1411   MatScalar      *aa = a->a;
1412 
1413   PetscFunctionBegin;
1414   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ"
1420 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1421 {
1422   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1423   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1424   MatScalar      *aa = a->a;
1425 
1426   PetscFunctionBegin;
1427   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 /* -------------------------------------------------------------------*/
1432 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1433        MatGetRow_SeqSBAIJ,
1434        MatRestoreRow_SeqSBAIJ,
1435        MatMult_SeqSBAIJ_N,
1436 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1437        MatMult_SeqSBAIJ_N,       /* transpose versions are same as non-transpose versions */
1438        MatMultAdd_SeqSBAIJ_N,
1439        0,
1440        0,
1441        0,
1442 /*10*/ 0,
1443        0,
1444        MatCholeskyFactor_SeqSBAIJ,
1445        MatRelax_SeqSBAIJ,
1446        MatTranspose_SeqSBAIJ,
1447 /*15*/ MatGetInfo_SeqSBAIJ,
1448        MatEqual_SeqSBAIJ,
1449        MatGetDiagonal_SeqSBAIJ,
1450        MatDiagonalScale_SeqSBAIJ,
1451        MatNorm_SeqSBAIJ,
1452 /*20*/ 0,
1453        MatAssemblyEnd_SeqSBAIJ,
1454        MatSetOption_SeqSBAIJ,
1455        MatZeroEntries_SeqSBAIJ,
1456 /*24*/ 0,
1457        0,
1458        0,
1459        0,
1460        0,
1461 /*29*/ MatSetUpPreallocation_SeqSBAIJ,
1462        0,
1463        0,
1464        MatGetArray_SeqSBAIJ,
1465        MatRestoreArray_SeqSBAIJ,
1466 /*34*/ MatDuplicate_SeqSBAIJ,
1467        0,
1468        0,
1469        0,
1470        MatICCFactor_SeqSBAIJ,
1471 /*39*/ MatAXPY_SeqSBAIJ,
1472        MatGetSubMatrices_SeqSBAIJ,
1473        MatIncreaseOverlap_SeqSBAIJ,
1474        MatGetValues_SeqSBAIJ,
1475        MatCopy_SeqSBAIJ,
1476 /*44*/ 0,
1477        MatScale_SeqSBAIJ,
1478        0,
1479        0,
1480        0,
1481 /*49*/ 0,
1482        MatGetRowIJ_SeqSBAIJ,
1483        MatRestoreRowIJ_SeqSBAIJ,
1484        0,
1485        0,
1486 /*54*/ 0,
1487        0,
1488        0,
1489        0,
1490        MatSetValuesBlocked_SeqSBAIJ,
1491 /*59*/ MatGetSubMatrix_SeqSBAIJ,
1492        0,
1493        0,
1494        0,
1495        0,
1496 /*64*/ 0,
1497        0,
1498        0,
1499        0,
1500        0,
1501 /*69*/ MatGetRowMaxAbs_SeqSBAIJ,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*74*/ 0,
1507        0,
1508        0,
1509        0,
1510        0,
1511 /*79*/ 0,
1512        0,
1513        0,
1514 #if !defined(PETSC_USE_COMPLEX)
1515        MatGetInertia_SeqSBAIJ,
1516 #else
1517        0,
1518 #endif
1519        MatLoad_SeqSBAIJ,
1520 /*84*/ MatIsSymmetric_SeqSBAIJ,
1521        MatIsHermitian_SeqSBAIJ,
1522        MatIsStructurallySymmetric_SeqSBAIJ,
1523        0,
1524        0,
1525 /*89*/ 0,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*94*/ 0,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*99*/ 0,
1536        0,
1537        0,
1538        0,
1539        0,
1540 /*104*/0,
1541        MatRealPart_SeqSBAIJ,
1542        MatImaginaryPart_SeqSBAIJ,
1543        MatGetRowUpperTriangular_SeqSBAIJ,
1544        MatRestoreRowUpperTriangular_SeqSBAIJ,
1545 /*109*/0,
1546        0,
1547        0,
1548        0,
1549        MatMissingDiagonal_SeqSBAIJ
1550 };
1551 
1552 EXTERN_C_BEGIN
1553 #undef __FUNCT__
1554 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1555 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1556 {
1557   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1558   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
1559   PetscErrorCode ierr;
1560 
1561   PetscFunctionBegin;
1562   if (aij->nonew != 1) {
1563     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1564   }
1565 
1566   /* allocate space for values if not already there */
1567   if (!aij->saved_values) {
1568     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1569   }
1570 
1571   /* copy values over */
1572   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 EXTERN_C_END
1576 
1577 EXTERN_C_BEGIN
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1580 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1581 {
1582   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1583   PetscErrorCode ierr;
1584   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
1585 
1586   PetscFunctionBegin;
1587   if (aij->nonew != 1) {
1588     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1589   }
1590   if (!aij->saved_values) {
1591     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1592   }
1593 
1594   /* copy values over */
1595   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1596   PetscFunctionReturn(0);
1597 }
1598 EXTERN_C_END
1599 
1600 EXTERN_C_BEGIN
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1603 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1604 {
1605   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1606   PetscErrorCode ierr;
1607   PetscInt       i,mbs,bs2, newbs = PetscAbs(bs);
1608   PetscTruth     skipallocation = PETSC_FALSE,flg = PETSC_FALSE;
1609 
1610   PetscFunctionBegin;
1611   B->preallocated = PETSC_TRUE;
1612   if (bs < 0) {
1613     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1614       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqSBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1615     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1616     bs   = PetscAbs(bs);
1617   }
1618   if (nnz && newbs != bs) {
1619     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
1620   }
1621   bs = newbs;
1622 
1623   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1624   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1625   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
1626   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
1627 
1628   mbs  = B->rmap->N/bs;
1629   bs2  = bs*bs;
1630 
1631   if (mbs*bs != B->rmap->N) {
1632     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1633   }
1634 
1635   if (nz == MAT_SKIP_ALLOCATION) {
1636     skipallocation = PETSC_TRUE;
1637     nz             = 0;
1638   }
1639 
1640   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1641   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1642   if (nnz) {
1643     for (i=0; i<mbs; i++) {
1644       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1645       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1646     }
1647   }
1648 
1649   B->ops->mult             = MatMult_SeqSBAIJ_N;
1650   B->ops->multadd          = MatMultAdd_SeqSBAIJ_N;
1651   B->ops->multtranspose    = MatMult_SeqSBAIJ_N;
1652   B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_N;
1653   ierr  = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_no_unroll",&flg,PETSC_NULL);CHKERRQ(ierr);
1654   if (!flg) {
1655     switch (bs) {
1656     case 1:
1657       B->ops->mult             = MatMult_SeqSBAIJ_1;
1658       B->ops->multadd          = MatMultAdd_SeqSBAIJ_1;
1659       B->ops->multtranspose    = MatMult_SeqSBAIJ_1;
1660       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1661       break;
1662     case 2:
1663       B->ops->mult             = MatMult_SeqSBAIJ_2;
1664       B->ops->multadd          = MatMultAdd_SeqSBAIJ_2;
1665       B->ops->multtranspose    = MatMult_SeqSBAIJ_2;
1666       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1667       break;
1668     case 3:
1669       B->ops->mult             = MatMult_SeqSBAIJ_3;
1670       B->ops->multadd          = MatMultAdd_SeqSBAIJ_3;
1671       B->ops->multtranspose    = MatMult_SeqSBAIJ_3;
1672       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1673       break;
1674     case 4:
1675       B->ops->mult             = MatMult_SeqSBAIJ_4;
1676       B->ops->multadd          = MatMultAdd_SeqSBAIJ_4;
1677       B->ops->multtranspose    = MatMult_SeqSBAIJ_4;
1678       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1679       break;
1680     case 5:
1681       B->ops->mult             = MatMult_SeqSBAIJ_5;
1682       B->ops->multadd          = MatMultAdd_SeqSBAIJ_5;
1683       B->ops->multtranspose    = MatMult_SeqSBAIJ_5;
1684       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1685       break;
1686     case 6:
1687       B->ops->mult             = MatMult_SeqSBAIJ_6;
1688       B->ops->multadd          = MatMultAdd_SeqSBAIJ_6;
1689       B->ops->multtranspose    = MatMult_SeqSBAIJ_6;
1690       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1691       break;
1692     case 7:
1693       B->ops->mult             = MatMult_SeqSBAIJ_7;
1694       B->ops->multadd          = MatMultAdd_SeqSBAIJ_7;
1695       B->ops->multtranspose    = MatMult_SeqSBAIJ_7;
1696       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1697       break;
1698     }
1699   }
1700 
1701   b->mbs = mbs;
1702   b->nbs = mbs;
1703   if (!skipallocation) {
1704     if (!b->imax) {
1705       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1706       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
1707     }
1708     if (!nnz) {
1709       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1710       else if (nz <= 0)        nz = 1;
1711       for (i=0; i<mbs; i++) {
1712         b->imax[i] = nz;
1713       }
1714       nz = nz*mbs; /* total nz */
1715     } else {
1716       nz = 0;
1717       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1718     }
1719     /* b->ilen will count nonzeros in each block row so far. */
1720     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1721     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1722 
1723     /* allocate the matrix space */
1724     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
1725     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
1726     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
1727     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1728     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1729     b->singlemalloc = PETSC_TRUE;
1730 
1731     /* pointer to beginning of each row */
1732     b->i[0] = 0;
1733     for (i=1; i<mbs+1; i++) {
1734       b->i[i] = b->i[i-1] + b->imax[i-1];
1735     }
1736     b->free_a     = PETSC_TRUE;
1737     b->free_ij    = PETSC_TRUE;
1738   } else {
1739     b->free_a     = PETSC_FALSE;
1740     b->free_ij    = PETSC_FALSE;
1741   }
1742 
1743   B->rmap->bs               = bs;
1744   b->bs2              = bs2;
1745   b->nz             = 0;
1746   b->maxnz          = nz*bs2;
1747 
1748   b->inew             = 0;
1749   b->jnew             = 0;
1750   b->anew             = 0;
1751   b->a2anew           = 0;
1752   b->permute          = PETSC_FALSE;
1753   PetscFunctionReturn(0);
1754 }
1755 EXTERN_C_END
1756 
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatSeqBAIJSetNumericFactorization"
1759 /*
1760    This is used to set the numeric factorization for both Cholesky and ICC symbolic factorization
1761 */
1762 PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat B,PetscTruth natural)
1763 {
1764   PetscErrorCode ierr;
1765   PetscTruth     flg = PETSC_FALSE;
1766   PetscInt       bs = B->rmap->bs;
1767 
1768   PetscFunctionBegin;
1769   ierr    = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_no_unroll",&flg,PETSC_NULL);CHKERRQ(ierr);
1770   if (flg) bs = 8;
1771 
1772   if (!natural) {
1773     switch (bs) {
1774     case 1:
1775       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1776       break;
1777     case 2:
1778       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1779       break;
1780     case 3:
1781       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1782       break;
1783     case 4:
1784       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1785       break;
1786     case 5:
1787       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1788       break;
1789     case 6:
1790       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1791       break;
1792     case 7:
1793       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1794       break;
1795     default:
1796       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N;
1797       break;
1798     }
1799   } else {
1800     switch (bs) {
1801     case 1:
1802       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1803       break;
1804     case 2:
1805       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1806       break;
1807     case 3:
1808       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1809       break;
1810     case 4:
1811       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1812       break;
1813     case 5:
1814       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1815       break;
1816     case 6:
1817       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1818       break;
1819     case 7:
1820       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1821       break;
1822     default:
1823       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1824       break;
1825     }
1826   }
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 EXTERN_C_BEGIN
1831 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1832 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1833 EXTERN_C_END
1834 
1835 
1836 EXTERN_C_BEGIN
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatGetFactor_seqsbaij_petsc"
1839 PetscErrorCode MatGetFactor_seqsbaij_petsc(Mat A,MatFactorType ftype,Mat *B)
1840 {
1841   PetscInt           n = A->rmap->n;
1842   PetscErrorCode     ierr;
1843 
1844   PetscFunctionBegin;
1845   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
1846   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
1847   if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
1848     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
1849     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1850     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ;
1851     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqSBAIJ;
1852   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
1853   (*B)->factor = ftype;
1854   PetscFunctionReturn(0);
1855 }
1856 EXTERN_C_END
1857 
1858 EXTERN_C_BEGIN
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatGetFactorAvailable_seqsbaij_petsc"
1861 PetscErrorCode MatGetFactorAvailable_seqsbaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
1862 {
1863   PetscFunctionBegin;
1864   if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
1865     *flg = PETSC_TRUE;
1866   } else {
1867     *flg = PETSC_FALSE;
1868   }
1869   PetscFunctionReturn(0);
1870 }
1871 EXTERN_C_END
1872 
1873 EXTERN_C_BEGIN
1874 #if defined(PETSC_HAVE_MUMPS)
1875 extern PetscErrorCode MatGetFactor_seqsbaij_mumps(Mat,MatFactorType,Mat*);
1876 #endif
1877 #if defined(PETSC_HAVE_SPOOLES)
1878 extern PetscErrorCode MatGetFactor_seqsbaij_spooles(Mat,MatFactorType,Mat*);
1879 #endif
1880 #if defined(PETSC_HAVE_PASTIX)
1881 extern PetscErrorCode MatGetFactor_seqsbaij_pastix(Mat,MatFactorType,Mat*);
1882 #endif
1883 EXTERN_C_END
1884 
1885 /*MC
1886   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1887   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1888 
1889   Options Database Keys:
1890   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1891 
1892   Level: beginner
1893 
1894   .seealso: MatCreateSeqSBAIJ
1895 M*/
1896 
1897 EXTERN_C_BEGIN
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1900 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1901 {
1902   Mat_SeqSBAIJ   *b;
1903   PetscErrorCode ierr;
1904   PetscMPIInt    size;
1905   PetscTruth     no_unroll = PETSC_FALSE,no_inode = PETSC_FALSE;
1906 
1907   PetscFunctionBegin;
1908   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
1909   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1910 
1911   ierr    = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1912   B->data = (void*)b;
1913   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1914   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1915   B->ops->view        = MatView_SeqSBAIJ;
1916   B->mapping          = 0;
1917   b->row              = 0;
1918   b->icol             = 0;
1919   b->reallocs         = 0;
1920   b->saved_values     = 0;
1921   b->inode.limit      = 5;
1922   b->inode.max_limit  = 5;
1923 
1924   b->roworiented      = PETSC_TRUE;
1925   b->nonew            = 0;
1926   b->diag             = 0;
1927   b->solve_work       = 0;
1928   b->mult_work        = 0;
1929   B->spptr            = 0;
1930   b->keepnonzeropattern   = PETSC_FALSE;
1931   b->xtoy             = 0;
1932   b->XtoY             = 0;
1933 
1934   b->inew             = 0;
1935   b->jnew             = 0;
1936   b->anew             = 0;
1937   b->a2anew           = 0;
1938   b->permute          = PETSC_FALSE;
1939 
1940   b->ignore_ltriangular = PETSC_FALSE;
1941   ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_ignore_lower_triangular",&b->ignore_ltriangular,PETSC_NULL);CHKERRQ(ierr);
1942 
1943   b->getrow_utriangular = PETSC_FALSE;
1944   ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_getrow_uppertriangular",&b->getrow_utriangular,PETSC_NULL);CHKERRQ(ierr);
1945 
1946 #if defined(PETSC_HAVE_PASTIX)
1947   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_pastix_C",
1948 					   "MatGetFactor_seqsbaij_pastix",
1949 					   MatGetFactor_seqsbaij_pastix);CHKERRQ(ierr);
1950 #endif
1951 #if defined(PETSC_HAVE_SPOOLES)
1952   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_spooles_C",
1953                                      "MatGetFactor_seqsbaij_spooles",
1954                                      MatGetFactor_seqsbaij_spooles);CHKERRQ(ierr);
1955 #endif
1956 #if defined(PETSC_HAVE_MUMPS)
1957   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_mumps_C",
1958                                      "MatGetFactor_seqsbaij_mumps",
1959                                      MatGetFactor_seqsbaij_mumps);CHKERRQ(ierr);
1960 #endif
1961   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqsbaij_petsc_C",
1962                                      "MatGetFactorAvailable_seqsbaij_petsc",
1963                                      MatGetFactorAvailable_seqsbaij_petsc);CHKERRQ(ierr);
1964   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_petsc_C",
1965                                      "MatGetFactor_seqsbaij_petsc",
1966                                      MatGetFactor_seqsbaij_petsc);CHKERRQ(ierr);
1967   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1968                                      "MatStoreValues_SeqSBAIJ",
1969                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1970   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1971                                      "MatRetrieveValues_SeqSBAIJ",
1972                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1973   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1974                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1975                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1976   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1977                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1978                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1979   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1980                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1981                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1982   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1983                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1984                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1985 
1986   B->symmetric                  = PETSC_TRUE;
1987   B->structurally_symmetric     = PETSC_TRUE;
1988   B->symmetric_set              = PETSC_TRUE;
1989   B->structurally_symmetric_set = PETSC_TRUE;
1990   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr);
1991 
1992   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for SEQSBAIJ matrix","Mat");CHKERRQ(ierr);
1993     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for inodes (slower)",PETSC_NULL,no_unroll,&no_unroll,PETSC_NULL);CHKERRQ(ierr);
1994     if (no_unroll) {ierr = PetscInfo(B,"Not using Inode routines due to -mat_no_unroll\n");CHKERRQ(ierr);}
1995     ierr = PetscOptionsTruth("-mat_no_inode","Do not optimize for inodes (slower)",PETSC_NULL,no_inode,&no_inode,PETSC_NULL);CHKERRQ(ierr);
1996     if (no_inode) {ierr = PetscInfo(B,"Not using Inode routines due to -mat_no_inode\n");CHKERRQ(ierr);}
1997     ierr = PetscOptionsInt("-mat_inode_limit","Do not use inodes larger then this value",PETSC_NULL,b->inode.limit,&b->inode.limit,PETSC_NULL);CHKERRQ(ierr);
1998   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1999   b->inode.use = (PetscTruth)(!(no_unroll || no_inode));
2000   if (b->inode.limit > b->inode.max_limit) b->inode.limit = b->inode.max_limit;
2001 
2002   PetscFunctionReturn(0);
2003 }
2004 EXTERN_C_END
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
2008 /*@C
2009    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
2010    compressed row) format.  For good matrix assembly performance the
2011    user should preallocate the matrix storage by setting the parameter nz
2012    (or the array nnz).  By setting these parameters accurately, performance
2013    during matrix assembly can be increased by more than a factor of 50.
2014 
2015    Collective on Mat
2016 
2017    Input Parameters:
2018 +  A - the symmetric matrix
2019 .  bs - size of block
2020 .  nz - number of block nonzeros per block row (same for all rows)
2021 -  nnz - array containing the number of block nonzeros in the upper triangular plus
2022          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
2023 
2024    Options Database Keys:
2025 .   -mat_no_unroll - uses code that does not unroll the loops in the
2026                      block calculations (much slower)
2027 .    -mat_block_size - size of the blocks to use (only works if a negative bs is passed in
2028 
2029    Level: intermediate
2030 
2031    Notes:
2032    Specify the preallocated storage with either nz or nnz (not both).
2033    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2034    allocation.  For additional details, see the users manual chapter on
2035    matrices.
2036 
2037    You can call MatGetInfo() to get information on how effective the preallocation was;
2038    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2039    You can also run with the option -info and look for messages with the string
2040    malloc in them to see if additional memory allocation was needed.
2041 
2042    If the nnz parameter is given then the nz parameter is ignored
2043 
2044 
2045 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
2046 @*/
2047 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2048 {
2049   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2050 
2051   PetscFunctionBegin;
2052   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2053   if (f) {
2054     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2055   }
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 #undef __FUNCT__
2060 #define __FUNCT__ "MatCreateSeqSBAIJ"
2061 /*@C
2062    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
2063    compressed row) format.  For good matrix assembly performance the
2064    user should preallocate the matrix storage by setting the parameter nz
2065    (or the array nnz).  By setting these parameters accurately, performance
2066    during matrix assembly can be increased by more than a factor of 50.
2067 
2068    Collective on MPI_Comm
2069 
2070    Input Parameters:
2071 +  comm - MPI communicator, set to PETSC_COMM_SELF
2072 .  bs - size of block
2073 .  m - number of rows, or number of columns
2074 .  nz - number of block nonzeros per block row (same for all rows)
2075 -  nnz - array containing the number of block nonzeros in the upper triangular plus
2076          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
2077 
2078    Output Parameter:
2079 .  A - the symmetric matrix
2080 
2081    Options Database Keys:
2082 .   -mat_no_unroll - uses code that does not unroll the loops in the
2083                      block calculations (much slower)
2084 .    -mat_block_size - size of the blocks to use
2085 
2086    Level: intermediate
2087 
2088    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2089    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2090    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2091 
2092    Notes:
2093    The number of rows and columns must be divisible by blocksize.
2094    This matrix type does not support complex Hermitian operation.
2095 
2096    Specify the preallocated storage with either nz or nnz (not both).
2097    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2098    allocation.  For additional details, see the users manual chapter on
2099    matrices.
2100 
2101    If the nnz parameter is given then the nz parameter is ignored
2102 
2103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
2104 @*/
2105 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2106 {
2107   PetscErrorCode ierr;
2108 
2109   PetscFunctionBegin;
2110   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2111   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2112   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2113   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 #undef __FUNCT__
2118 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
2119 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2120 {
2121   Mat            C;
2122   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
2123   PetscErrorCode ierr;
2124   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2125 
2126   PetscFunctionBegin;
2127   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2128 
2129   *B = 0;
2130   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
2131   ierr = MatSetSizes(C,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
2132   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2133   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2134   c    = (Mat_SeqSBAIJ*)C->data;
2135 
2136   C->preallocated       = PETSC_TRUE;
2137   C->factor             = A->factor;
2138   c->row                = 0;
2139   c->icol               = 0;
2140   c->saved_values       = 0;
2141   c->keepnonzeropattern = a->keepnonzeropattern;
2142   C->assembled          = PETSC_TRUE;
2143 
2144   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
2145   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
2146   c->bs2  = a->bs2;
2147   c->mbs  = a->mbs;
2148   c->nbs  = a->nbs;
2149 
2150   ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr);
2151   for (i=0; i<mbs; i++) {
2152     c->imax[i] = a->imax[i];
2153     c->ilen[i] = a->ilen[i];
2154   }
2155 
2156   /* allocate the matrix space */
2157   ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2158   c->singlemalloc = PETSC_TRUE;
2159   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2160   ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
2161   if (mbs > 0) {
2162     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2163     if (cpvalues == MAT_COPY_VALUES) {
2164       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2165     } else {
2166       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2167     }
2168     if (a->jshort) {
2169       ierr = PetscMalloc(nz*sizeof(unsigned short),&c->jshort);CHKERRQ(ierr);
2170       ierr = PetscMemcpy(c->jshort,a->jshort,nz*sizeof(unsigned short));CHKERRQ(ierr);
2171     }
2172   }
2173 
2174   c->roworiented = a->roworiented;
2175   c->nonew       = a->nonew;
2176 
2177   if (a->diag) {
2178     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2179     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2180     for (i=0; i<mbs; i++) {
2181       c->diag[i] = a->diag[i];
2182     }
2183   } else c->diag  = 0;
2184   c->nz           = a->nz;
2185   c->maxnz        = a->maxnz;
2186   c->solve_work   = 0;
2187   c->mult_work    = 0;
2188   c->free_a       = PETSC_TRUE;
2189   c->free_ij      = PETSC_TRUE;
2190   *B = C;
2191   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatLoad_SeqSBAIJ"
2197 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2198 {
2199   Mat_SeqSBAIJ   *a;
2200   Mat            B;
2201   PetscErrorCode ierr;
2202   int            fd;
2203   PetscMPIInt    size;
2204   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2205   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
2206   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2207   PetscInt       *masked,nmask,tmp,bs2,ishift;
2208   PetscScalar    *aa;
2209   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2210 
2211   PetscFunctionBegin;
2212   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2213   bs2  = bs*bs;
2214 
2215   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2216   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2217   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2218   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2219   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2220   M = header[1]; N = header[2]; nz = header[3];
2221 
2222   if (header[3] < 0) {
2223     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
2224   }
2225 
2226   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2227 
2228   /*
2229      This code adds extra rows to make sure the number of rows is
2230     divisible by the blocksize
2231   */
2232   mbs        = M/bs;
2233   extra_rows = bs - M + bs*(mbs);
2234   if (extra_rows == bs) extra_rows = 0;
2235   else                  mbs++;
2236   if (extra_rows) {
2237     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2238   }
2239 
2240   /* read in row lengths */
2241   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2242   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2243   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2244 
2245   /* read in column indices */
2246   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2247   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2248   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2249 
2250   /* loop over row lengths determining block row lengths */
2251   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
2252   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2253   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2254   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2255   masked   = mask + mbs;
2256   rowcount = 0; nzcount = 0;
2257   for (i=0; i<mbs; i++) {
2258     nmask = 0;
2259     for (j=0; j<bs; j++) {
2260       kmax = rowlengths[rowcount];
2261       for (k=0; k<kmax; k++) {
2262         tmp = jj[nzcount++]/bs;   /* block col. index */
2263         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
2264       }
2265       rowcount++;
2266     }
2267     s_browlengths[i] += nmask;
2268 
2269     /* zero out the mask elements we set */
2270     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2271   }
2272 
2273   /* create our matrix */
2274   ierr = MatCreate(comm,&B);CHKERRQ(ierr);
2275   ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2276   ierr = MatSetType(B,type);CHKERRQ(ierr);
2277   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
2278   a = (Mat_SeqSBAIJ*)B->data;
2279 
2280   /* set matrix "i" values */
2281   a->i[0] = 0;
2282   for (i=1; i<= mbs; i++) {
2283     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
2284     a->ilen[i-1] = s_browlengths[i-1];
2285   }
2286   a->nz = a->i[mbs];
2287 
2288   /* read in nonzero values */
2289   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2290   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2291   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2292 
2293   /* set "a" and "j" values into matrix */
2294   nzcount = 0; jcount = 0;
2295   for (i=0; i<mbs; i++) {
2296     nzcountb = nzcount;
2297     nmask    = 0;
2298     for (j=0; j<bs; j++) {
2299       kmax = rowlengths[i*bs+j];
2300       for (k=0; k<kmax; k++) {
2301         tmp = jj[nzcount++]/bs; /* block col. index */
2302         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
2303       }
2304     }
2305     /* sort the masked values */
2306     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2307 
2308     /* set "j" values into matrix */
2309     maskcount = 1;
2310     for (j=0; j<nmask; j++) {
2311       a->j[jcount++]  = masked[j];
2312       mask[masked[j]] = maskcount++;
2313     }
2314 
2315     /* set "a" values into matrix */
2316     ishift = bs2*a->i[i];
2317     for (j=0; j<bs; j++) {
2318       kmax = rowlengths[i*bs+j];
2319       for (k=0; k<kmax; k++) {
2320         tmp       = jj[nzcountb]/bs ; /* block col. index */
2321         if (tmp >= i){
2322           block     = mask[tmp] - 1;
2323           point     = jj[nzcountb] - bs*tmp;
2324           idx       = ishift + bs2*block + j + bs*point;
2325           a->a[idx] = aa[nzcountb];
2326         }
2327         nzcountb++;
2328       }
2329     }
2330     /* zero out the mask elements we set */
2331     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2332   }
2333   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2334 
2335   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2336   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
2337   ierr = PetscFree(aa);CHKERRQ(ierr);
2338   ierr = PetscFree(jj);CHKERRQ(ierr);
2339   ierr = PetscFree(mask);CHKERRQ(ierr);
2340 
2341   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2342   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2343   ierr = MatView_Private(B);CHKERRQ(ierr);
2344   *A = B;
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatRelax_SeqSBAIJ"
2350 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2351 {
2352   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
2353   const MatScalar *aa=a->a,*v,*v1,*aidiag;
2354   PetscScalar     *x,*b,*t,sum;
2355   MatScalar       tmp;
2356   PetscErrorCode  ierr;
2357   PetscInt        m=a->mbs,bs=A->rmap->bs,j;
2358   const PetscInt  *ai=a->i,*aj=a->j,*vj,*vj1;
2359   PetscInt        nz,nz1,i;
2360 
2361   PetscFunctionBegin;
2362   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
2363 
2364   its = its*lits;
2365   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2366 
2367   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2368 
2369   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2370   if (xx != bb) {
2371     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2372   } else {
2373     b = x;
2374   }
2375 
2376   if (!a->idiagvalid) {
2377     if (!a->idiag) {
2378       ierr     = PetscMalloc(m*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
2379     }
2380     for (i=0; i<a->mbs; i++) a->idiag[i] = 1.0/a->a[a->i[i]];
2381     a->idiagvalid = PETSC_TRUE;
2382   }
2383 
2384   if (!a->relax_work) {
2385     ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr);
2386   }
2387   t = a->relax_work;
2388 
2389   aidiag = a->idiag;
2390   if (flag & SOR_ZERO_INITIAL_GUESS) {
2391     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2392       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2393 
2394       v  = aa + 1;
2395       vj = aj + 1;
2396       for (i=0; i<m; i++){
2397         nz = ai[i+1] - ai[i] - 1;
2398         tmp = - (x[i] = omega*t[i]*aidiag[i]);
2399         for (j=0; j<nz; j++) {
2400           t[vj[j]] += tmp*v[j];
2401         }
2402         v  += nz + 1;
2403         vj += nz + 1;
2404       }
2405       ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
2406     }
2407 
2408     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2409       int nz2;
2410       if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){
2411         t = b;
2412       }
2413 
2414       v  = aa + ai[m-1] + 1;
2415       vj = aj + ai[m-1] + 1;
2416       nz = 0;
2417       for (i=m-1; i>=0; i--){
2418         sum = 0.0;
2419         nz2 = ai[i] - ai[i-1] - 1;
2420 	PETSC_Prefetch(v-nz2-1,0,1);
2421 	PETSC_Prefetch(vj-nz2-1,0,1);
2422         PetscSparseDensePlusDot(sum,x,v,vj,nz);
2423         sum = t[i] - sum;
2424         x[i] =   (1-omega)*x[i] + omega*sum*aidiag[i];
2425         nz  = nz2;
2426         v  -= nz + 1;
2427         vj -= nz + 1;
2428       }
2429       t = a->relax_work;
2430       ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
2431     }
2432     its--;
2433   }
2434 
2435   while (its--) {
2436     /*
2437        forward sweep:
2438        for i=0,...,m-1:
2439          sum[i] = (b[i] - U(i,:)x )/d[i];
2440          x[i]   = (1-omega)x[i] + omega*sum[i];
2441          b      = b - x[i]*U^T(i,:);
2442 
2443     */
2444     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2445       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2446 
2447       for (i=0; i<m; i++){
2448         v  = aa + ai[i] + 1; v1=v;
2449         vj = aj + ai[i] + 1; vj1=vj;
2450         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2451         sum = t[i];
2452         ierr = PetscLogFlops(4.0*nz-2);CHKERRQ(ierr);
2453         while (nz1--) sum -= (*v1++)*x[*vj1++];
2454         x[i] = (1-omega)*x[i] + omega*sum*aidiag[i];
2455         while (nz--) t[*vj++] -= x[i]*(*v++);
2456       }
2457     }
2458 
2459     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2460       /*
2461        backward sweep:
2462        b = b - x[i]*U^T(i,:), i=0,...,n-2
2463        for i=m-1,...,0:
2464          sum[i] = (b[i] - U(i,:)x )/d[i];
2465          x[i]   = (1-omega)x[i] + omega*sum[i];
2466       */
2467       /* if there was a forward sweep done above then I thing the next two for loops are not needed */
2468       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2469 
2470       for (i=0; i<m-1; i++){  /* update rhs */
2471         v  = aa + ai[i] + 1;
2472         vj = aj + ai[i] + 1;
2473         nz = ai[i+1] - ai[i] - 1;
2474         ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr);
2475         while (nz--) t[*vj++] -= x[i]*(*v++);
2476       }
2477       for (i=m-1; i>=0; i--){
2478         v  = aa + ai[i] + 1;
2479         vj = aj + ai[i] + 1;
2480         nz = ai[i+1] - ai[i] - 1;
2481         ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr);
2482         sum = t[i];
2483         while (nz--) sum -= x[*vj++]*(*v++);
2484         x[i] =   (1-omega)*x[i] + omega*sum*aidiag[i];
2485       }
2486     }
2487   }
2488 
2489   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2490   if (bb != xx) {
2491     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2492   }
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 #undef __FUNCT__
2497 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays"
2498 /*@
2499      MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements
2500               (upper triangular entries in CSR format) provided by the user.
2501 
2502      Collective on MPI_Comm
2503 
2504    Input Parameters:
2505 +  comm - must be an MPI communicator of size 1
2506 .  bs - size of block
2507 .  m - number of rows
2508 .  n - number of columns
2509 .  i - row indices
2510 .  j - column indices
2511 -  a - matrix values
2512 
2513    Output Parameter:
2514 .  mat - the matrix
2515 
2516    Level: intermediate
2517 
2518    Notes:
2519        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2520     once the matrix is destroyed
2521 
2522        You cannot set new nonzero locations into this matrix, that will generate an error.
2523 
2524        The i and j indices are 0 based
2525 
2526 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ()
2527 
2528 @*/
2529 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2530 {
2531   PetscErrorCode ierr;
2532   PetscInt       ii;
2533   Mat_SeqSBAIJ   *sbaij;
2534 
2535   PetscFunctionBegin;
2536   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2537   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2538 
2539   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2540   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2541   ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr);
2542   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2543   sbaij = (Mat_SeqSBAIJ*)(*mat)->data;
2544   ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr);
2545 
2546   sbaij->i = i;
2547   sbaij->j = j;
2548   sbaij->a = a;
2549   sbaij->singlemalloc = PETSC_FALSE;
2550   sbaij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2551   sbaij->free_a       = PETSC_FALSE;
2552   sbaij->free_ij      = PETSC_FALSE;
2553 
2554   for (ii=0; ii<m; ii++) {
2555     sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii];
2556 #if defined(PETSC_USE_DEBUG)
2557     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2558 #endif
2559   }
2560 #if defined(PETSC_USE_DEBUG)
2561   for (ii=0; ii<sbaij->i[m]; ii++) {
2562     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2563     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2564   }
2565 #endif
2566 
2567   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2568   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 
2573 
2574 
2575 
2576