xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision d543d7c6650afe38e02d4e42c97e77acf4b0e00a)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "../src/mat/impls/sbaij/seq/sbaij.h"
9 
10 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth);
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A,PetscTruth *missing,PetscInt *dd)
19 {
20   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       *diag,*jj = a->j,i;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   *missing = PETSC_FALSE;
28   for (i=0; i<a->mbs; i++) {
29     if (jj[diag[i]] != i) {
30       *missing    = PETSC_TRUE;
31       if (dd) *dd = i;
32       break;
33     }
34   }
35   PetscFunctionReturn(0);
36 }
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
40 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
41 {
42   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       i;
45 
46   PetscFunctionBegin;
47   if (!a->diag) {
48     ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
49   }
50   for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i];
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
56 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
57 {
58   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
59   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs;
60   PetscErrorCode ierr;
61 
62   PetscFunctionBegin;
63   *nn = n;
64   if (!ia) PetscFunctionReturn(0);
65   if (!blockcompressed) {
66     /* malloc & create the natural set of indices */
67     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
68     for (i=0; i<n+1; i++) {
69       for (j=0; j<bs; j++) {
70         *ia[i*bs+j] = a->i[i]*bs+j+oshift;
71       }
72     }
73     for (i=0; i<nz; i++) {
74       for (j=0; j<bs; j++) {
75         *ja[i*bs+j] = a->j[i]*bs+j+oshift;
76       }
77     }
78   } else { /* blockcompressed */
79     if (oshift == 1) {
80       /* temporarily add 1 to i and j indices */
81       for (i=0; i<nz; i++) a->j[i]++;
82       for (i=0; i<n+1; i++) a->i[i]++;
83     }
84     *ia = a->i; *ja = a->j;
85   }
86 
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
92 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
93 {
94   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
95   PetscInt     i,n = a->mbs,nz = a->i[n];
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99   if (!ia) PetscFunctionReturn(0);
100 
101   if (!blockcompressed) {
102     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
103   } else if (oshift == 1) { /* blockcompressed */
104     for (i=0; i<nz; i++) a->j[i]--;
105     for (i=0; i<n+1; i++) a->i[i]--;
106   }
107 
108   PetscFunctionReturn(0);
109 }
110 
111 #undef __FUNCT__
112 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
113 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
114 {
115   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119 #if defined(PETSC_USE_LOG)
120   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap->N,a->nz);
121 #endif
122   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
123   if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);}
124   if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);}
125   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
126   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
127   if (a->inode.size) {ierr = PetscFree(a->inode.size);CHKERRQ(ierr);}
128   ierr = PetscFree(a->diag);CHKERRQ(ierr);
129   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
130   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
131   ierr = PetscFree(a->relax_work);CHKERRQ(ierr);
132   ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
133   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
134   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
135   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
136   ierr = PetscFree(a->jshort);CHKERRQ(ierr);
137   ierr = PetscFree(a->inew);CHKERRQ(ierr);
138   ierr = PetscFree(a);CHKERRQ(ierr);
139 
140   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
141   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
142   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
143   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
144   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
145   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
146   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 #undef __FUNCT__
151 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
152 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg)
153 {
154   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
155   PetscErrorCode ierr;
156 
157   PetscFunctionBegin;
158   switch (op) {
159   case MAT_ROW_ORIENTED:
160     a->roworiented = flg;
161     break;
162   case MAT_KEEP_NONZERO_PATTERN:
163     a->keepnonzeropattern = flg;
164     break;
165   case MAT_NEW_NONZERO_LOCATIONS:
166     a->nonew = (flg ? 0 : 1);
167     break;
168   case MAT_NEW_NONZERO_LOCATION_ERR:
169     a->nonew = (flg ? -1 : 0);
170     break;
171   case MAT_NEW_NONZERO_ALLOCATION_ERR:
172     a->nonew = (flg ? -2 : 0);
173     break;
174   case MAT_UNUSED_NONZERO_LOCATION_ERR:
175     a->nounused = (flg ? -1 : 0);
176     break;
177   case MAT_NEW_DIAGONALS:
178   case MAT_IGNORE_OFF_PROC_ENTRIES:
179   case MAT_USE_HASH_TABLE:
180     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
181     break;
182   case MAT_HERMITIAN:
183     if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
184   case MAT_SYMMETRIC:
185   case MAT_STRUCTURALLY_SYMMETRIC:
186   case MAT_SYMMETRY_ETERNAL:
187     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
188     ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr);
189     break;
190   case MAT_IGNORE_LOWER_TRIANGULAR:
191     a->ignore_ltriangular = flg;
192     break;
193   case MAT_ERROR_LOWER_TRIANGULAR:
194     a->ignore_ltriangular = flg;
195     break;
196   case MAT_GETROW_UPPERTRIANGULAR:
197     a->getrow_utriangular = flg;
198     break;
199   default:
200     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
201   }
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
207 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
208 {
209   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
210   PetscErrorCode ierr;
211   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
212   MatScalar      *aa,*aa_i;
213   PetscScalar    *v_i;
214 
215   PetscFunctionBegin;
216   if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()");
217   /* Get the upper triangular part of the row */
218   bs  = A->rmap->bs;
219   ai  = a->i;
220   aj  = a->j;
221   aa  = a->a;
222   bs2 = a->bs2;
223 
224   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
225 
226   bn  = row/bs;   /* Block number */
227   bp  = row % bs; /* Block position */
228   M   = ai[bn+1] - ai[bn];
229   *ncols = bs*M;
230 
231   if (v) {
232     *v = 0;
233     if (*ncols) {
234       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
235       for (i=0; i<M; i++) { /* for each block in the block row */
236         v_i  = *v + i*bs;
237         aa_i = aa + bs2*(ai[bn] + i);
238         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
239       }
240     }
241   }
242 
243   if (cols) {
244     *cols = 0;
245     if (*ncols) {
246       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
247       for (i=0; i<M; i++) { /* for each block in the block row */
248         cols_i = *cols + i*bs;
249         itmp  = bs*aj[ai[bn] + i];
250         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
251       }
252     }
253   }
254 
255   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
256   /* this segment is currently removed, so only entries in the upper triangle are obtained */
257 #ifdef column_search
258   v_i    = *v    + M*bs;
259   cols_i = *cols + M*bs;
260   for (i=0; i<bn; i++){ /* for each block row */
261     M = ai[i+1] - ai[i];
262     for (j=0; j<M; j++){
263       itmp = aj[ai[i] + j];    /* block column value */
264       if (itmp == bn){
265         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
266         for (k=0; k<bs; k++) {
267           *cols_i++ = i*bs+k;
268           *v_i++    = aa_i[k];
269         }
270         *ncols += bs;
271         break;
272       }
273     }
274   }
275 #endif
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
281 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
282 {
283   PetscErrorCode ierr;
284 
285   PetscFunctionBegin;
286   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
287   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
288   PetscFunctionReturn(0);
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ"
293 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
294 {
295   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
296 
297   PetscFunctionBegin;
298   a->getrow_utriangular = PETSC_TRUE;
299   PetscFunctionReturn(0);
300 }
301 #undef __FUNCT__
302 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ"
303 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
304 {
305   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
306 
307   PetscFunctionBegin;
308   a->getrow_utriangular = PETSC_FALSE;
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
314 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,MatReuse reuse,Mat *B)
315 {
316   PetscErrorCode ierr;
317   PetscFunctionBegin;
318   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
319     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
326 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
327 {
328   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
329   PetscErrorCode    ierr;
330   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
331   const char        *name;
332   PetscViewerFormat format;
333 
334   PetscFunctionBegin;
335   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
336   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
337   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
338     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
339   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
340     Mat aij;
341 
342     if (A->factor && bs>1){
343       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr);
344       PetscFunctionReturn(0);
345     }
346     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
347     ierr = MatView(aij,viewer);CHKERRQ(ierr);
348     ierr = MatDestroy(aij);CHKERRQ(ierr);
349   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
350     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
351     for (i=0; i<a->mbs; i++) {
352       for (j=0; j<bs; j++) {
353         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
354         for (k=a->i[i]; k<a->i[i+1]; k++) {
355           for (l=0; l<bs; l++) {
356 #if defined(PETSC_USE_COMPLEX)
357             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
358               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
359                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
360             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
361               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
362                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
363             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
364               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
365             }
366 #else
367             if (a->a[bs2*k + l*bs + j] != 0.0) {
368               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
369             }
370 #endif
371           }
372         }
373         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
374       }
375     }
376     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
377   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
378      PetscFunctionReturn(0);
379   } else {
380     if (A->factor && bs>1){
381       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr);
382     }
383     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
384     for (i=0; i<a->mbs; i++) {
385       for (j=0; j<bs; j++) {
386         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
387         for (k=a->i[i]; k<a->i[i+1]; k++) {
388           for (l=0; l<bs; l++) {
389 #if defined(PETSC_USE_COMPLEX)
390             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
391               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
392                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
393             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
394               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
395                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
396             } else {
397               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
398             }
399 #else
400             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
401 #endif
402           }
403         }
404         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
405       }
406     }
407     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
408   }
409   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
415 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
416 {
417   Mat            A = (Mat) Aa;
418   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
419   PetscErrorCode ierr;
420   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
421   PetscMPIInt    rank;
422   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
423   MatScalar      *aa;
424   MPI_Comm       comm;
425   PetscViewer    viewer;
426 
427   PetscFunctionBegin;
428   /*
429     This is nasty. If this is called from an originally parallel matrix
430     then all processes call this,but only the first has the matrix so the
431     rest should return immediately.
432   */
433   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
434   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
435   if (rank) PetscFunctionReturn(0);
436 
437   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
438 
439   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
440   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
441 
442   /* loop over matrix elements drawing boxes */
443   color = PETSC_DRAW_BLUE;
444   for (i=0,row=0; i<mbs; i++,row+=bs) {
445     for (j=a->i[i]; j<a->i[i+1]; j++) {
446       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
447       x_l = a->j[j]*bs; x_r = x_l + 1.0;
448       aa = a->a + j*bs2;
449       for (k=0; k<bs; k++) {
450         for (l=0; l<bs; l++) {
451           if (PetscRealPart(*aa++) >=  0.) continue;
452           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
453         }
454       }
455     }
456   }
457   color = PETSC_DRAW_CYAN;
458   for (i=0,row=0; i<mbs; i++,row+=bs) {
459     for (j=a->i[i]; j<a->i[i+1]; j++) {
460       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
461       x_l = a->j[j]*bs; x_r = x_l + 1.0;
462       aa = a->a + j*bs2;
463       for (k=0; k<bs; k++) {
464         for (l=0; l<bs; l++) {
465           if (PetscRealPart(*aa++) != 0.) continue;
466           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
467         }
468       }
469     }
470   }
471 
472   color = PETSC_DRAW_RED;
473   for (i=0,row=0; i<mbs; i++,row+=bs) {
474     for (j=a->i[i]; j<a->i[i+1]; j++) {
475       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
476       x_l = a->j[j]*bs; x_r = x_l + 1.0;
477       aa = a->a + j*bs2;
478       for (k=0; k<bs; k++) {
479         for (l=0; l<bs; l++) {
480           if (PetscRealPart(*aa++) <= 0.) continue;
481           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
482         }
483       }
484     }
485   }
486   PetscFunctionReturn(0);
487 }
488 
489 #undef __FUNCT__
490 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
491 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
492 {
493   PetscErrorCode ierr;
494   PetscReal      xl,yl,xr,yr,w,h;
495   PetscDraw      draw;
496   PetscTruth     isnull;
497 
498   PetscFunctionBegin;
499   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
500   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
501 
502   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
503   xr  = A->rmap->N; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
504   xr += w;    yr += h;  xl = -w;     yl = -h;
505   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
506   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
507   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatView_SeqSBAIJ"
513 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
514 {
515   PetscErrorCode ierr;
516   PetscTruth     iascii,isdraw;
517 
518   PetscFunctionBegin;
519   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
520   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
521   if (iascii){
522     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
523   } else if (isdraw) {
524     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
525   } else {
526     Mat B;
527     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
528     ierr = MatView(B,viewer);CHKERRQ(ierr);
529     ierr = MatDestroy(B);CHKERRQ(ierr);
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 
535 #undef __FUNCT__
536 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
537 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
538 {
539   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
540   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
541   PetscInt     *ai = a->i,*ailen = a->ilen;
542   PetscInt     brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
543   MatScalar    *ap,*aa = a->a;
544 
545   PetscFunctionBegin;
546   for (k=0; k<m; k++) { /* loop over rows */
547     row  = im[k]; brow = row/bs;
548     if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */
549     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
550     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
551     nrow = ailen[brow];
552     for (l=0; l<n; l++) { /* loop over columns */
553       if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */
554       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
555       col  = in[l] ;
556       bcol = col/bs;
557       cidx = col%bs;
558       ridx = row%bs;
559       high = nrow;
560       low  = 0; /* assume unsorted */
561       while (high-low > 5) {
562         t = (low+high)/2;
563         if (rp[t] > bcol) high = t;
564         else             low  = t;
565       }
566       for (i=low; i<high; i++) {
567         if (rp[i] > bcol) break;
568         if (rp[i] == bcol) {
569           *v++ = ap[bs2*i+bs*cidx+ridx];
570           goto finished;
571         }
572       }
573       *v++ = 0.0;
574       finished:;
575     }
576   }
577   PetscFunctionReturn(0);
578 }
579 
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
583 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
584 {
585   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
586   PetscErrorCode    ierr;
587   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
588   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
589   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
590   PetscTruth        roworiented=a->roworiented;
591   const PetscScalar *value = v;
592   MatScalar         *ap,*aa = a->a,*bap;
593 
594   PetscFunctionBegin;
595   if (roworiented) {
596     stepval = (n-1)*bs;
597   } else {
598     stepval = (m-1)*bs;
599   }
600   for (k=0; k<m; k++) { /* loop over added rows */
601     row  = im[k];
602     if (row < 0) continue;
603 #if defined(PETSC_USE_DEBUG)
604     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
605 #endif
606     rp   = aj + ai[row];
607     ap   = aa + bs2*ai[row];
608     rmax = imax[row];
609     nrow = ailen[row];
610     low  = 0;
611     high = nrow;
612     for (l=0; l<n; l++) { /* loop over added columns */
613       if (in[l] < 0) continue;
614       col = in[l];
615 #if defined(PETSC_USE_DEBUG)
616       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
617 #endif
618       if (col < row) continue; /* ignore lower triangular block */
619       if (roworiented) {
620         value = v + k*(stepval+bs)*bs + l*bs;
621       } else {
622         value = v + l*(stepval+bs)*bs + k*bs;
623       }
624       if (col <= lastcol) low = 0; else high = nrow;
625       lastcol = col;
626       while (high-low > 7) {
627         t = (low+high)/2;
628         if (rp[t] > col) high = t;
629         else             low  = t;
630       }
631       for (i=low; i<high; i++) {
632         if (rp[i] > col) break;
633         if (rp[i] == col) {
634           bap  = ap +  bs2*i;
635           if (roworiented) {
636             if (is == ADD_VALUES) {
637               for (ii=0; ii<bs; ii++,value+=stepval) {
638                 for (jj=ii; jj<bs2; jj+=bs) {
639                   bap[jj] += *value++;
640                 }
641               }
642             } else {
643               for (ii=0; ii<bs; ii++,value+=stepval) {
644                 for (jj=ii; jj<bs2; jj+=bs) {
645                   bap[jj] = *value++;
646                 }
647                }
648             }
649           } else {
650             if (is == ADD_VALUES) {
651               for (ii=0; ii<bs; ii++,value+=stepval) {
652                 for (jj=0; jj<bs; jj++) {
653                   *bap++ += *value++;
654                 }
655               }
656             } else {
657               for (ii=0; ii<bs; ii++,value+=stepval) {
658                 for (jj=0; jj<bs; jj++) {
659                   *bap++  = *value++;
660                 }
661               }
662             }
663           }
664           goto noinsert2;
665         }
666       }
667       if (nonew == 1) goto noinsert2;
668       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
669       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
670       N = nrow++ - 1; high++;
671       /* shift up all the later entries in this row */
672       for (ii=N; ii>=i; ii--) {
673         rp[ii+1] = rp[ii];
674         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
675       }
676       if (N >= i) {
677         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
678       }
679       rp[i] = col;
680       bap   = ap +  bs2*i;
681       if (roworiented) {
682         for (ii=0; ii<bs; ii++,value+=stepval) {
683           for (jj=ii; jj<bs2; jj+=bs) {
684             bap[jj] = *value++;
685           }
686         }
687       } else {
688         for (ii=0; ii<bs; ii++,value+=stepval) {
689           for (jj=0; jj<bs; jj++) {
690             *bap++  = *value++;
691           }
692         }
693        }
694     noinsert2:;
695       low = i;
696     }
697     ailen[row] = nrow;
698   }
699    PetscFunctionReturn(0);
700 }
701 
702 /*
703     This is not yet used
704 */
705 #undef __FUNCT__
706 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ_Inode"
707 PetscErrorCode MatAssemblyEnd_SeqSBAIJ_Inode(Mat A)
708 {
709   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
710   PetscErrorCode  ierr;
711   const PetscInt  *ai = a->i, *aj = a->j,*cols;
712   PetscInt        i = 0,j,blk_size,m = A->rmap->n,node_count = 0,nzx,nzy,*ns,row,nz,cnt,cnt2,*counts;
713   PetscTruth      flag;
714 
715   PetscFunctionBegin;
716   ierr = PetscMalloc(m*sizeof(PetscInt),&ns);CHKERRQ(ierr);
717   while (i < m){
718     nzx = ai[i+1] - ai[i];       /* Number of nonzeros */
719     /* Limits the number of elements in a node to 'a->inode.limit' */
720     for (j=i+1,blk_size=1; j<m && blk_size <a->inode.limit; ++j,++blk_size) {
721       nzy  = ai[j+1] - ai[j];
722       if (nzy != (nzx - j + i)) break;
723       ierr = PetscMemcmp(aj + ai[i] + j - i,aj + ai[j],nzy*sizeof(PetscInt),&flag);CHKERRQ(ierr);
724       if (!flag) break;
725     }
726     ns[node_count++] = blk_size;
727     i = j;
728   }
729   if (!a->inode.size && m && node_count > .9*m) {
730     ierr = PetscFree(ns);CHKERRQ(ierr);
731     ierr = PetscInfo2(A,"Found %D nodes out of %D rows. Not using Inode routines\n",node_count,m);CHKERRQ(ierr);
732   } else {
733     a->inode.node_count = node_count;
734     ierr = PetscMalloc(node_count*sizeof(PetscInt),&a->inode.size);CHKERRQ(ierr);
735     ierr = PetscMemcpy(a->inode.size,ns,node_count*sizeof(PetscInt));
736     ierr = PetscFree(ns);CHKERRQ(ierr);
737     ierr = PetscInfo3(A,"Found %D nodes of %D. Limit used: %D. Using Inode routines\n",node_count,m,a->inode.limit);CHKERRQ(ierr);
738 
739     /* count collections of adjacent columns in each inode */
740     row = 0;
741     cnt = 0;
742     for (i=0; i<node_count; i++) {
743       cols = aj + ai[row] + a->inode.size[i];
744       nz   = ai[row+1] - ai[row] - a->inode.size[i];
745       for (j=1; j<nz; j++) {
746         if (cols[j] != cols[j-1]+1) {
747           cnt++;
748         }
749       }
750       cnt++;
751       row += a->inode.size[i];
752     }
753     ierr = PetscMalloc(2*cnt*sizeof(PetscInt),&counts);CHKERRQ(ierr);
754     cnt = 0;
755     row = 0;
756     for (i=0; i<node_count; i++) {
757       cols          = aj + ai[row] + a->inode.size[i];
758 	  CHKMEMQ;
759       counts[2*cnt] = cols[0];
760 	  CHKMEMQ;
761       nz            = ai[row+1] - ai[row] - a->inode.size[i];
762       cnt2          = 1;
763       for (j=1; j<nz; j++) {
764         if (cols[j] != cols[j-1]+1) {
765 	  CHKMEMQ;
766           counts[2*(cnt++)+1] = cnt2;
767           counts[2*cnt]       = cols[j];
768 	  CHKMEMQ;
769           cnt2                = 1;
770         } else cnt2++;
771       }
772 	  CHKMEMQ;
773       counts[2*(cnt++)+1] = cnt2;
774 	  CHKMEMQ;
775       row += a->inode.size[i];
776     }
777     ierr = PetscIntView(2*cnt,counts,0);
778   }
779 
780 
781   PetscFunctionReturn(0);
782 }
783 
784 #undef __FUNCT__
785 #define __FUNCT__ "MatRelax_SeqSBAIJ_1_ushort"
786 PetscErrorCode MatRelax_SeqSBAIJ_1_ushort(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
787 {
788   Mat_SeqSBAIJ         *a = (Mat_SeqSBAIJ*)A->data;
789   const MatScalar      *aa=a->a,*v,*v1,*aidiag;
790   PetscScalar          *x,*b,*t,sum;
791   MatScalar            tmp;
792   PetscErrorCode       ierr;
793   PetscInt             m=a->mbs,bs=A->rmap->bs,j;
794   const PetscInt       *ai=a->i;
795   const unsigned short *aj=a->jshort,*vj,*vj1;
796   PetscInt              nz,nz1,i;
797 
798   PetscFunctionBegin;
799   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
800 
801   its = its*lits;
802   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
803 
804   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
805 
806   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
807   if (xx != bb) {
808     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
809   } else {
810     b = x;
811   }
812 
813   if (!a->idiagvalid) {
814     if (!a->idiag) {
815       ierr     = PetscMalloc(m*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
816     }
817     for (i=0; i<a->mbs; i++) a->idiag[i] = 1.0/a->a[a->i[i]];
818     a->idiagvalid = PETSC_TRUE;
819   }
820 
821   if (!a->relax_work) {
822     ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr);
823   }
824   t = a->relax_work;
825 
826   aidiag = a->idiag;
827 
828   if (flag == SOR_APPLY_UPPER) {
829     /* apply (U + D/omega) to the vector */
830     PetscScalar d;
831     for (i=0; i<m; i++) {
832       d    = fshift + aa[ai[i]];
833       nz   = ai[i+1] - ai[i] - 1;
834       vj   = aj + ai[i] + 1;
835       v    = aa + ai[i] + 1;
836       sum  = b[i]*d/omega;
837       PetscSparseDensePlusDot(sum,b,v,vj,nz);
838       x[i] = sum;
839     }
840     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
841   }
842 
843   if (flag & SOR_ZERO_INITIAL_GUESS) {
844     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
845       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
846 
847       v  = aa + 1;
848       vj = aj + 1;
849       for (i=0; i<m; i++){
850         nz = ai[i+1] - ai[i] - 1;
851         tmp = - (x[i] = omega*t[i]*aidiag[i]);
852         for (j=0; j<nz; j++) {
853           t[vj[j]] += tmp*v[j];
854         }
855         v  += nz + 1;
856         vj += nz + 1;
857       }
858       ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
859     }
860 
861     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
862       int nz2;
863       if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){
864         t = b;
865       }
866 
867       v  = aa + ai[m-1] + 1;
868       vj = aj + ai[m-1] + 1;
869       nz = 0;
870       for (i=m-1; i>=0; i--){
871         sum = 0.0;
872         nz2 = ai[i] - ai[i-1] - 1;
873 	PETSC_Prefetch(v-nz2-1,0,1);
874 	PETSC_Prefetch(vj-nz2-1,0,1);
875         PetscSparseDensePlusDot(sum,x,v,vj,nz);
876         sum = t[i] - sum;
877         if (t == b) {
878 	  x[i] = omega*sum*aidiag[i];
879         } else {
880 	  x[i] = (1-omega)*x[i] + omega*sum*aidiag[i];
881         }
882         nz  = nz2;
883         v  -= nz + 1;
884         vj -= nz + 1;
885       }
886       t = a->relax_work;
887       ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
888     }
889     its--;
890   }
891 
892   while (its--) {
893     /*
894        forward sweep:
895        for i=0,...,m-1:
896          sum[i] = (b[i] - U(i,:)x )/d[i];
897          x[i]   = (1-omega)x[i] + omega*sum[i];
898          b      = b - x[i]*U^T(i,:);
899 
900     */
901     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
902       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
903 
904       for (i=0; i<m; i++){
905         v  = aa + ai[i] + 1; v1=v;
906         vj = aj + ai[i] + 1; vj1=vj;
907         nz = ai[i+1] - ai[i] - 1; nz1=nz;
908         sum = t[i];
909         ierr = PetscLogFlops(4.0*nz-2);CHKERRQ(ierr);
910         while (nz1--) sum -= (*v1++)*x[*vj1++];
911         x[i] = (1-omega)*x[i] + omega*sum*aidiag[i];
912         while (nz--) t[*vj++] -= x[i]*(*v++);
913       }
914     }
915 
916     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
917       /*
918        backward sweep:
919        b = b - x[i]*U^T(i,:), i=0,...,n-2
920        for i=m-1,...,0:
921          sum[i] = (b[i] - U(i,:)x )/d[i];
922          x[i]   = (1-omega)x[i] + omega*sum[i];
923       */
924       /* if there was a forward sweep done above then I thing the next two for loops are not needed */
925       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
926 
927       for (i=0; i<m-1; i++){  /* update rhs */
928         v  = aa + ai[i] + 1;
929         vj = aj + ai[i] + 1;
930         nz = ai[i+1] - ai[i] - 1;
931         ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr);
932         while (nz--) t[*vj++] -= x[i]*(*v++);
933       }
934       for (i=m-1; i>=0; i--){
935         v  = aa + ai[i] + 1;
936         vj = aj + ai[i] + 1;
937         nz = ai[i+1] - ai[i] - 1;
938         ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr);
939         sum = t[i];
940         while (nz--) sum -= x[*vj++]*(*v++);
941         x[i] =   (1-omega)*x[i] + omega*sum*aidiag[i];
942       }
943     }
944   }
945 
946   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
947   if (bb != xx) {
948     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
949   }
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatMult_SeqSBAIJ_1_ushort"
955 PetscErrorCode MatMult_SeqSBAIJ_1_ushort(Mat A,Vec xx,Vec zz)
956 {
957   Mat_SeqSBAIJ         *a = (Mat_SeqSBAIJ*)A->data;
958   const PetscScalar    *x;
959   PetscScalar          *z,x1,sum;
960   const MatScalar      *v;
961   PetscErrorCode       ierr;
962   PetscInt             mbs=a->mbs,i,j,nz;
963   const unsigned short *ib=a->jshort;
964   const PetscInt       *ai=a->i;
965 
966   PetscFunctionBegin;
967   ierr = VecSet(zz,0.0);CHKERRQ(ierr);
968   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
969   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
970 
971   v  = a->a;
972   for (i=0; i<mbs; i++) {
973     nz   = ai[i+1] - ai[i];        /* length of i_th row of A */
974     x1   = x[i];
975     sum  = v[0]*x1;                /* diagonal term */
976     for (j=1; j<nz; j++) {
977       z[ib[j]] += v[j] * x1;       /* (strict lower triangular part of A)*x  */
978       sum      += v[j] * x[ib[j]]; /* (strict upper triangular part of A)*x  */
979     }
980     z[i] += sum;
981     v    += nz;
982     ib   += nz;
983   }
984 
985   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
986   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
987   ierr = PetscLogFlops(2.0*(2.0*a->nz - mbs) - mbs);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
993 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
994 {
995   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
996   PetscErrorCode ierr;
997   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
998   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
999   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1000   MatScalar      *aa = a->a,*ap;
1001 
1002   PetscFunctionBegin;
1003   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1004 
1005   if (m) rmax = ailen[0];
1006   for (i=1; i<mbs; i++) {
1007     /* move each row back by the amount of empty slots (fshift) before it*/
1008     fshift += imax[i-1] - ailen[i-1];
1009      rmax   = PetscMax(rmax,ailen[i]);
1010      if (fshift) {
1011        ip = aj + ai[i]; ap = aa + bs2*ai[i];
1012        N = ailen[i];
1013        for (j=0; j<N; j++) {
1014          ip[j-fshift] = ip[j];
1015          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1016        }
1017      }
1018      ai[i] = ai[i-1] + ailen[i-1];
1019   }
1020   if (mbs) {
1021     fshift += imax[mbs-1] - ailen[mbs-1];
1022      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1023   }
1024   /* reset ilen and imax for each row */
1025   for (i=0; i<mbs; i++) {
1026     ailen[i] = imax[i] = ai[i+1] - ai[i];
1027   }
1028   a->nz = ai[mbs];
1029 
1030   /* diagonals may have moved, reset it */
1031   if (a->diag) {
1032     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1033   }
1034   if (fshift && a->nounused == -1) {
1035     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1036   }
1037   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap->N,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1038   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1039   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1040   a->reallocs          = 0;
1041   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1042   a->idiagvalid = PETSC_FALSE;
1043 
1044   if (A->cmap->n < 65536 && A->cmap->bs == 1) {
1045     if (!a->jshort) {
1046       ierr = PetscMalloc(a->i[A->rmap->n]*sizeof(unsigned short),&a->jshort);CHKERRQ(ierr);
1047       for (i=0; i<a->i[A->rmap->n]; i++) a->jshort[i] = a->j[i];
1048       A->ops->mult  = MatMult_SeqSBAIJ_1_ushort;
1049       A->ops->relax = MatRelax_SeqSBAIJ_1_ushort;
1050     }
1051   }
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 /*
1056    This function returns an array of flags which indicate the locations of contiguous
1057    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1058    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1059    Assume: sizes should be long enough to hold all the values.
1060 */
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
1063 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1064 {
1065   PetscInt   i,j,k,row;
1066   PetscTruth flg;
1067 
1068   PetscFunctionBegin;
1069    for (i=0,j=0; i<n; j++) {
1070      row = idx[i];
1071      if (row%bs!=0) { /* Not the begining of a block */
1072        sizes[j] = 1;
1073        i++;
1074      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
1075        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1076        i++;
1077      } else { /* Begining of the block, so check if the complete block exists */
1078        flg = PETSC_TRUE;
1079        for (k=1; k<bs; k++) {
1080          if (row+k != idx[i+k]) { /* break in the block */
1081            flg = PETSC_FALSE;
1082            break;
1083          }
1084        }
1085        if (flg) { /* No break in the bs */
1086          sizes[j] = bs;
1087          i+= bs;
1088        } else {
1089          sizes[j] = 1;
1090          i++;
1091        }
1092      }
1093    }
1094    *bs_max = j;
1095    PetscFunctionReturn(0);
1096 }
1097 
1098 
1099 /* Only add/insert a(i,j) with i<=j (blocks).
1100    Any a(i,j) with i>j input by user is ingored.
1101 */
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
1105 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1106 {
1107   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1108   PetscErrorCode ierr;
1109   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1110   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
1111   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
1112   PetscInt       ridx,cidx,bs2=a->bs2;
1113   MatScalar      *ap,value,*aa=a->a,*bap;
1114 
1115   PetscFunctionBegin;
1116   for (k=0; k<m; k++) { /* loop over added rows */
1117     row  = im[k];       /* row number */
1118     brow = row/bs;      /* block row number */
1119     if (row < 0) continue;
1120 #if defined(PETSC_USE_DEBUG)
1121     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
1122 #endif
1123     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
1124     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
1125     rmax = imax[brow];  /* maximum space allocated for this row */
1126     nrow = ailen[brow]; /* actual length of this row */
1127     low  = 0;
1128 
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       if (in[l] < 0) continue;
1131 #if defined(PETSC_USE_DEBUG)
1132       if (in[l] >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap->N-1);
1133 #endif
1134       col = in[l];
1135       bcol = col/bs;              /* block col number */
1136 
1137       if (brow > bcol) {
1138         if (a->ignore_ltriangular){
1139           continue; /* ignore lower triangular values */
1140         } else {
1141           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
1142         }
1143       }
1144 
1145       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
1146       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
1147         /* element value a(k,l) */
1148         if (roworiented) {
1149           value = v[l + k*n];
1150         } else {
1151           value = v[k + l*m];
1152         }
1153 
1154         /* move pointer bap to a(k,l) quickly and add/insert value */
1155         if (col <= lastcol) low = 0; high = nrow;
1156         lastcol = col;
1157         while (high-low > 7) {
1158           t = (low+high)/2;
1159           if (rp[t] > bcol) high = t;
1160           else              low  = t;
1161         }
1162         for (i=low; i<high; i++) {
1163           if (rp[i] > bcol) break;
1164           if (rp[i] == bcol) {
1165             bap  = ap +  bs2*i + bs*cidx + ridx;
1166             if (is == ADD_VALUES) *bap += value;
1167             else                  *bap  = value;
1168             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
1169             if (brow == bcol && ridx < cidx){
1170               bap  = ap +  bs2*i + bs*ridx + cidx;
1171               if (is == ADD_VALUES) *bap += value;
1172               else                  *bap  = value;
1173             }
1174             goto noinsert1;
1175           }
1176         }
1177 
1178         if (nonew == 1) goto noinsert1;
1179         if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1180         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1181 
1182         N = nrow++ - 1; high++;
1183         /* shift up all the later entries in this row */
1184         for (ii=N; ii>=i; ii--) {
1185           rp[ii+1] = rp[ii];
1186           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1187         }
1188         if (N>=i) {
1189           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1190         }
1191         rp[i]                      = bcol;
1192         ap[bs2*i + bs*cidx + ridx] = value;
1193       noinsert1:;
1194         low = i;
1195       }
1196     }   /* end of loop over added columns */
1197     ailen[brow] = nrow;
1198   }   /* end of loop over added rows */
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1204 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,const MatFactorInfo *info)
1205 {
1206   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1207   Mat            outA;
1208   PetscErrorCode ierr;
1209   PetscTruth     row_identity;
1210 
1211   PetscFunctionBegin;
1212   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc");
1213   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1214   if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported");
1215   if (inA->rmap->bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap->bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */
1216 
1217   outA        = inA;
1218   inA->factor = MAT_FACTOR_ICC;
1219 
1220   ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1221   ierr = MatSeqSBAIJSetNumericFactorization(inA,row_identity);CHKERRQ(ierr);
1222 
1223   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1224   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1225   a->row = row;
1226   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1227   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1228   a->col = row;
1229 
1230   /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */
1231   if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);}
1232   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1233 
1234   if (!a->solve_work) {
1235     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1236     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1237   }
1238 
1239   ierr = MatCholeskyFactorNumeric(outA,inA,info);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 EXTERN_C_BEGIN
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1246 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1247 {
1248   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1249   PetscInt     i,nz,n;
1250 
1251   PetscFunctionBegin;
1252   nz = baij->maxnz;
1253   n  = mat->cmap->n;
1254   for (i=0; i<nz; i++) {
1255     baij->j[i] = indices[i];
1256   }
1257    baij->nz = nz;
1258    for (i=0; i<n; i++) {
1259      baij->ilen[i] = baij->imax[i];
1260    }
1261    PetscFunctionReturn(0);
1262 }
1263 EXTERN_C_END
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1267 /*@
1268   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1269   in the matrix.
1270 
1271   Input Parameters:
1272   +  mat     - the SeqSBAIJ matrix
1273   -  indices - the column indices
1274 
1275   Level: advanced
1276 
1277   Notes:
1278   This can be called if you have precomputed the nonzero structure of the
1279   matrix and want to provide it to the matrix object to improve the performance
1280   of the MatSetValues() operation.
1281 
1282   You MUST have set the correct numbers of nonzeros per row in the call to
1283   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1284 
1285   MUST be called before any calls to MatSetValues()
1286 
1287   .seealso: MatCreateSeqSBAIJ
1288 @*/
1289 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1290 {
1291   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1292 
1293   PetscFunctionBegin;
1294   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1295   PetscValidPointer(indices,2);
1296   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1297   if (f) {
1298     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1299   } else {
1300     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1301   }
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatCopy_SeqSBAIJ"
1307 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1308 {
1309   PetscErrorCode ierr;
1310 
1311   PetscFunctionBegin;
1312   /* If the two matrices have the same copy implementation, use fast copy. */
1313   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1314     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1315     Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1316 
1317     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
1318       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1319     }
1320     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
1321   } else {
1322     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1323     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1324     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1325   }
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1331 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1332 {
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1342 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1343 {
1344   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1345   PetscFunctionBegin;
1346   *array = a->a;
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1352 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1353 {
1354   PetscFunctionBegin;
1355   PetscFunctionReturn(0);
1356  }
1357 
1358 #include "petscblaslapack.h"
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1361 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1362 {
1363   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1364   PetscErrorCode ierr;
1365   PetscInt       i,bs=Y->rmap->bs,bs2,j;
1366   PetscBLASInt   one = 1,bnz = PetscBLASIntCast(x->nz);
1367 
1368   PetscFunctionBegin;
1369   if (str == SAME_NONZERO_PATTERN) {
1370     PetscScalar alpha = a;
1371     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1372   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1373     if (y->xtoy && y->XtoY != X) {
1374       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1375       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1376     }
1377     if (!y->xtoy) { /* get xtoy */
1378       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1379       y->XtoY = X;
1380     }
1381     bs2 = bs*bs;
1382     for (i=0; i<x->nz; i++) {
1383       j = 0;
1384       while (j < bs2){
1385         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1386         j++;
1387       }
1388     }
1389     ierr = PetscInfo3(Y,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1390   } else {
1391     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1392     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1393     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1394   }
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 #undef __FUNCT__
1399 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1400 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1401 {
1402   PetscFunctionBegin;
1403   *flg = PETSC_TRUE;
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1409 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1410 {
1411    PetscFunctionBegin;
1412    *flg = PETSC_TRUE;
1413    PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1418 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1419  {
1420    PetscFunctionBegin;
1421    *flg = PETSC_FALSE;
1422    PetscFunctionReturn(0);
1423  }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatRealPart_SeqSBAIJ"
1427 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1428 {
1429   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1430   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1431   MatScalar      *aa = a->a;
1432 
1433   PetscFunctionBegin;
1434   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ"
1440 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1441 {
1442   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1443   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1444   MatScalar      *aa = a->a;
1445 
1446   PetscFunctionBegin;
1447   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 /* -------------------------------------------------------------------*/
1452 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1453        MatGetRow_SeqSBAIJ,
1454        MatRestoreRow_SeqSBAIJ,
1455        MatMult_SeqSBAIJ_N,
1456 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1457        MatMult_SeqSBAIJ_N,       /* transpose versions are same as non-transpose versions */
1458        MatMultAdd_SeqSBAIJ_N,
1459        0,
1460        0,
1461        0,
1462 /*10*/ 0,
1463        0,
1464        MatCholeskyFactor_SeqSBAIJ,
1465        MatRelax_SeqSBAIJ,
1466        MatTranspose_SeqSBAIJ,
1467 /*15*/ MatGetInfo_SeqSBAIJ,
1468        MatEqual_SeqSBAIJ,
1469        MatGetDiagonal_SeqSBAIJ,
1470        MatDiagonalScale_SeqSBAIJ,
1471        MatNorm_SeqSBAIJ,
1472 /*20*/ 0,
1473        MatAssemblyEnd_SeqSBAIJ,
1474        MatSetOption_SeqSBAIJ,
1475        MatZeroEntries_SeqSBAIJ,
1476 /*24*/ 0,
1477        0,
1478        0,
1479        0,
1480        0,
1481 /*29*/ MatSetUpPreallocation_SeqSBAIJ,
1482        0,
1483        0,
1484        MatGetArray_SeqSBAIJ,
1485        MatRestoreArray_SeqSBAIJ,
1486 /*34*/ MatDuplicate_SeqSBAIJ,
1487        0,
1488        0,
1489        0,
1490        MatICCFactor_SeqSBAIJ,
1491 /*39*/ MatAXPY_SeqSBAIJ,
1492        MatGetSubMatrices_SeqSBAIJ,
1493        MatIncreaseOverlap_SeqSBAIJ,
1494        MatGetValues_SeqSBAIJ,
1495        MatCopy_SeqSBAIJ,
1496 /*44*/ 0,
1497        MatScale_SeqSBAIJ,
1498        0,
1499        0,
1500        0,
1501 /*49*/ 0,
1502        MatGetRowIJ_SeqSBAIJ,
1503        MatRestoreRowIJ_SeqSBAIJ,
1504        0,
1505        0,
1506 /*54*/ 0,
1507        0,
1508        0,
1509        0,
1510        MatSetValuesBlocked_SeqSBAIJ,
1511 /*59*/ MatGetSubMatrix_SeqSBAIJ,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*64*/ 0,
1517        0,
1518        0,
1519        0,
1520        0,
1521 /*69*/ MatGetRowMaxAbs_SeqSBAIJ,
1522        0,
1523        0,
1524        0,
1525        0,
1526 /*74*/ 0,
1527        0,
1528        0,
1529        0,
1530        0,
1531 /*79*/ 0,
1532        0,
1533        0,
1534 #if !defined(PETSC_USE_COMPLEX)
1535        MatGetInertia_SeqSBAIJ,
1536 #else
1537        0,
1538 #endif
1539        MatLoad_SeqSBAIJ,
1540 /*84*/ MatIsSymmetric_SeqSBAIJ,
1541        MatIsHermitian_SeqSBAIJ,
1542        MatIsStructurallySymmetric_SeqSBAIJ,
1543        0,
1544        0,
1545 /*89*/ 0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*94*/ 0,
1551        0,
1552        0,
1553        0,
1554        0,
1555 /*99*/ 0,
1556        0,
1557        0,
1558        0,
1559        0,
1560 /*104*/0,
1561        MatRealPart_SeqSBAIJ,
1562        MatImaginaryPart_SeqSBAIJ,
1563        MatGetRowUpperTriangular_SeqSBAIJ,
1564        MatRestoreRowUpperTriangular_SeqSBAIJ,
1565 /*109*/0,
1566        0,
1567        0,
1568        0,
1569        MatMissingDiagonal_SeqSBAIJ
1570 };
1571 
1572 EXTERN_C_BEGIN
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1575 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1576 {
1577   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1578   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
1579   PetscErrorCode ierr;
1580 
1581   PetscFunctionBegin;
1582   if (aij->nonew != 1) {
1583     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1584   }
1585 
1586   /* allocate space for values if not already there */
1587   if (!aij->saved_values) {
1588     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1589   }
1590 
1591   /* copy values over */
1592   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1593   PetscFunctionReturn(0);
1594 }
1595 EXTERN_C_END
1596 
1597 EXTERN_C_BEGIN
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1600 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1601 {
1602   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1603   PetscErrorCode ierr;
1604   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
1605 
1606   PetscFunctionBegin;
1607   if (aij->nonew != 1) {
1608     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1609   }
1610   if (!aij->saved_values) {
1611     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1612   }
1613 
1614   /* copy values over */
1615   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1616   PetscFunctionReturn(0);
1617 }
1618 EXTERN_C_END
1619 
1620 EXTERN_C_BEGIN
1621 #undef __FUNCT__
1622 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1623 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1624 {
1625   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1626   PetscErrorCode ierr;
1627   PetscInt       i,mbs,bs2, newbs = PetscAbs(bs);
1628   PetscTruth     skipallocation = PETSC_FALSE,flg = PETSC_FALSE;
1629 
1630   PetscFunctionBegin;
1631   B->preallocated = PETSC_TRUE;
1632   if (bs < 0) {
1633     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1634       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqSBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1635     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1636     bs   = PetscAbs(bs);
1637   }
1638   if (nnz && newbs != bs) {
1639     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
1640   }
1641   bs = newbs;
1642 
1643   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1644   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1645   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
1646   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
1647 
1648   mbs  = B->rmap->N/bs;
1649   bs2  = bs*bs;
1650 
1651   if (mbs*bs != B->rmap->N) {
1652     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1653   }
1654 
1655   if (nz == MAT_SKIP_ALLOCATION) {
1656     skipallocation = PETSC_TRUE;
1657     nz             = 0;
1658   }
1659 
1660   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1661   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1662   if (nnz) {
1663     for (i=0; i<mbs; i++) {
1664       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1665       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1666     }
1667   }
1668 
1669   B->ops->mult             = MatMult_SeqSBAIJ_N;
1670   B->ops->multadd          = MatMultAdd_SeqSBAIJ_N;
1671   B->ops->multtranspose    = MatMult_SeqSBAIJ_N;
1672   B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_N;
1673   ierr  = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_no_unroll",&flg,PETSC_NULL);CHKERRQ(ierr);
1674   if (!flg) {
1675     switch (bs) {
1676     case 1:
1677       B->ops->mult             = MatMult_SeqSBAIJ_1;
1678       B->ops->multadd          = MatMultAdd_SeqSBAIJ_1;
1679       B->ops->multtranspose    = MatMult_SeqSBAIJ_1;
1680       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1681       break;
1682     case 2:
1683       B->ops->mult             = MatMult_SeqSBAIJ_2;
1684       B->ops->multadd          = MatMultAdd_SeqSBAIJ_2;
1685       B->ops->multtranspose    = MatMult_SeqSBAIJ_2;
1686       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1687       break;
1688     case 3:
1689       B->ops->mult             = MatMult_SeqSBAIJ_3;
1690       B->ops->multadd          = MatMultAdd_SeqSBAIJ_3;
1691       B->ops->multtranspose    = MatMult_SeqSBAIJ_3;
1692       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1693       break;
1694     case 4:
1695       B->ops->mult             = MatMult_SeqSBAIJ_4;
1696       B->ops->multadd          = MatMultAdd_SeqSBAIJ_4;
1697       B->ops->multtranspose    = MatMult_SeqSBAIJ_4;
1698       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1699       break;
1700     case 5:
1701       B->ops->mult             = MatMult_SeqSBAIJ_5;
1702       B->ops->multadd          = MatMultAdd_SeqSBAIJ_5;
1703       B->ops->multtranspose    = MatMult_SeqSBAIJ_5;
1704       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1705       break;
1706     case 6:
1707       B->ops->mult             = MatMult_SeqSBAIJ_6;
1708       B->ops->multadd          = MatMultAdd_SeqSBAIJ_6;
1709       B->ops->multtranspose    = MatMult_SeqSBAIJ_6;
1710       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1711       break;
1712     case 7:
1713       B->ops->mult             = MatMult_SeqSBAIJ_7;
1714       B->ops->multadd          = MatMultAdd_SeqSBAIJ_7;
1715       B->ops->multtranspose    = MatMult_SeqSBAIJ_7;
1716       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1717       break;
1718     }
1719   }
1720 
1721   b->mbs = mbs;
1722   b->nbs = mbs;
1723   if (!skipallocation) {
1724     if (!b->imax) {
1725       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1726       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
1727     }
1728     if (!nnz) {
1729       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1730       else if (nz <= 0)        nz = 1;
1731       for (i=0; i<mbs; i++) {
1732         b->imax[i] = nz;
1733       }
1734       nz = nz*mbs; /* total nz */
1735     } else {
1736       nz = 0;
1737       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1738     }
1739     /* b->ilen will count nonzeros in each block row so far. */
1740     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1741     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1742 
1743     /* allocate the matrix space */
1744     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
1745     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
1746     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
1747     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1748     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1749     b->singlemalloc = PETSC_TRUE;
1750 
1751     /* pointer to beginning of each row */
1752     b->i[0] = 0;
1753     for (i=1; i<mbs+1; i++) {
1754       b->i[i] = b->i[i-1] + b->imax[i-1];
1755     }
1756     b->free_a     = PETSC_TRUE;
1757     b->free_ij    = PETSC_TRUE;
1758   } else {
1759     b->free_a     = PETSC_FALSE;
1760     b->free_ij    = PETSC_FALSE;
1761   }
1762 
1763   B->rmap->bs               = bs;
1764   b->bs2              = bs2;
1765   b->nz             = 0;
1766   b->maxnz          = nz*bs2;
1767 
1768   b->inew             = 0;
1769   b->jnew             = 0;
1770   b->anew             = 0;
1771   b->a2anew           = 0;
1772   b->permute          = PETSC_FALSE;
1773   PetscFunctionReturn(0);
1774 }
1775 EXTERN_C_END
1776 
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatSeqBAIJSetNumericFactorization"
1779 /*
1780    This is used to set the numeric factorization for both Cholesky and ICC symbolic factorization
1781 */
1782 PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat B,PetscTruth natural)
1783 {
1784   PetscErrorCode ierr;
1785   PetscTruth     flg = PETSC_FALSE;
1786   PetscInt       bs = B->rmap->bs;
1787 
1788   PetscFunctionBegin;
1789   ierr    = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_no_unroll",&flg,PETSC_NULL);CHKERRQ(ierr);
1790   if (flg) bs = 8;
1791 
1792   if (!natural) {
1793     switch (bs) {
1794     case 1:
1795       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1796       break;
1797     case 2:
1798       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1799       break;
1800     case 3:
1801       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1802       break;
1803     case 4:
1804       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1805       break;
1806     case 5:
1807       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1808       break;
1809     case 6:
1810       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1811       break;
1812     case 7:
1813       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1814       break;
1815     default:
1816       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N;
1817       break;
1818     }
1819   } else {
1820     switch (bs) {
1821     case 1:
1822       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1823       break;
1824     case 2:
1825       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1826       break;
1827     case 3:
1828       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1829       break;
1830     case 4:
1831       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1832       break;
1833     case 5:
1834       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1835       break;
1836     case 6:
1837       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1838       break;
1839     case 7:
1840       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1841       break;
1842     default:
1843       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1844       break;
1845     }
1846   }
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 EXTERN_C_BEGIN
1851 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1852 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1853 EXTERN_C_END
1854 
1855 
1856 EXTERN_C_BEGIN
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatGetFactor_seqsbaij_petsc"
1859 PetscErrorCode MatGetFactor_seqsbaij_petsc(Mat A,MatFactorType ftype,Mat *B)
1860 {
1861   PetscInt           n = A->rmap->n;
1862   PetscErrorCode     ierr;
1863 
1864   PetscFunctionBegin;
1865   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
1866   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
1867   if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
1868     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
1869     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1870     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ;
1871     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqSBAIJ;
1872   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
1873   (*B)->factor = ftype;
1874   PetscFunctionReturn(0);
1875 }
1876 EXTERN_C_END
1877 
1878 EXTERN_C_BEGIN
1879 #undef __FUNCT__
1880 #define __FUNCT__ "MatGetFactorAvailable_seqsbaij_petsc"
1881 PetscErrorCode MatGetFactorAvailable_seqsbaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
1882 {
1883   PetscFunctionBegin;
1884   if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
1885     *flg = PETSC_TRUE;
1886   } else {
1887     *flg = PETSC_FALSE;
1888   }
1889   PetscFunctionReturn(0);
1890 }
1891 EXTERN_C_END
1892 
1893 EXTERN_C_BEGIN
1894 #if defined(PETSC_HAVE_MUMPS)
1895 extern PetscErrorCode MatGetFactor_seqsbaij_mumps(Mat,MatFactorType,Mat*);
1896 #endif
1897 #if defined(PETSC_HAVE_SPOOLES)
1898 extern PetscErrorCode MatGetFactor_seqsbaij_spooles(Mat,MatFactorType,Mat*);
1899 #endif
1900 #if defined(PETSC_HAVE_PASTIX)
1901 extern PetscErrorCode MatGetFactor_seqsbaij_pastix(Mat,MatFactorType,Mat*);
1902 #endif
1903 EXTERN_C_END
1904 
1905 /*MC
1906   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1907   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1908 
1909   Options Database Keys:
1910   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1911 
1912   Level: beginner
1913 
1914   .seealso: MatCreateSeqSBAIJ
1915 M*/
1916 
1917 EXTERN_C_BEGIN
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1920 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1921 {
1922   Mat_SeqSBAIJ   *b;
1923   PetscErrorCode ierr;
1924   PetscMPIInt    size;
1925   PetscTruth     no_unroll = PETSC_FALSE,no_inode = PETSC_FALSE;
1926 
1927   PetscFunctionBegin;
1928   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
1929   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1930 
1931   ierr    = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1932   B->data = (void*)b;
1933   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1934   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1935   B->ops->view        = MatView_SeqSBAIJ;
1936   B->mapping          = 0;
1937   b->row              = 0;
1938   b->icol             = 0;
1939   b->reallocs         = 0;
1940   b->saved_values     = 0;
1941   b->inode.limit      = 5;
1942   b->inode.max_limit  = 5;
1943 
1944   b->roworiented      = PETSC_TRUE;
1945   b->nonew            = 0;
1946   b->diag             = 0;
1947   b->solve_work       = 0;
1948   b->mult_work        = 0;
1949   B->spptr            = 0;
1950   b->keepnonzeropattern   = PETSC_FALSE;
1951   b->xtoy             = 0;
1952   b->XtoY             = 0;
1953 
1954   b->inew             = 0;
1955   b->jnew             = 0;
1956   b->anew             = 0;
1957   b->a2anew           = 0;
1958   b->permute          = PETSC_FALSE;
1959 
1960   b->ignore_ltriangular = PETSC_FALSE;
1961   ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_ignore_lower_triangular",&b->ignore_ltriangular,PETSC_NULL);CHKERRQ(ierr);
1962 
1963   b->getrow_utriangular = PETSC_FALSE;
1964   ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_getrow_uppertriangular",&b->getrow_utriangular,PETSC_NULL);CHKERRQ(ierr);
1965 
1966 #if defined(PETSC_HAVE_PASTIX)
1967   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_pastix_C",
1968 					   "MatGetFactor_seqsbaij_pastix",
1969 					   MatGetFactor_seqsbaij_pastix);CHKERRQ(ierr);
1970 #endif
1971 #if defined(PETSC_HAVE_SPOOLES)
1972   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_spooles_C",
1973                                      "MatGetFactor_seqsbaij_spooles",
1974                                      MatGetFactor_seqsbaij_spooles);CHKERRQ(ierr);
1975 #endif
1976 #if defined(PETSC_HAVE_MUMPS)
1977   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_mumps_C",
1978                                      "MatGetFactor_seqsbaij_mumps",
1979                                      MatGetFactor_seqsbaij_mumps);CHKERRQ(ierr);
1980 #endif
1981   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqsbaij_petsc_C",
1982                                      "MatGetFactorAvailable_seqsbaij_petsc",
1983                                      MatGetFactorAvailable_seqsbaij_petsc);CHKERRQ(ierr);
1984   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_petsc_C",
1985                                      "MatGetFactor_seqsbaij_petsc",
1986                                      MatGetFactor_seqsbaij_petsc);CHKERRQ(ierr);
1987   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1988                                      "MatStoreValues_SeqSBAIJ",
1989                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1990   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1991                                      "MatRetrieveValues_SeqSBAIJ",
1992                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1993   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1994                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1995                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1996   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1997                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1998                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1999   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
2000                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
2001                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
2002   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
2003                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
2004                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
2005 
2006   B->symmetric                  = PETSC_TRUE;
2007   B->structurally_symmetric     = PETSC_TRUE;
2008   B->symmetric_set              = PETSC_TRUE;
2009   B->structurally_symmetric_set = PETSC_TRUE;
2010   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr);
2011 
2012   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for SEQSBAIJ matrix","Mat");CHKERRQ(ierr);
2013     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for inodes (slower)",PETSC_NULL,no_unroll,&no_unroll,PETSC_NULL);CHKERRQ(ierr);
2014     if (no_unroll) {ierr = PetscInfo(B,"Not using Inode routines due to -mat_no_unroll\n");CHKERRQ(ierr);}
2015     ierr = PetscOptionsTruth("-mat_no_inode","Do not optimize for inodes (slower)",PETSC_NULL,no_inode,&no_inode,PETSC_NULL);CHKERRQ(ierr);
2016     if (no_inode) {ierr = PetscInfo(B,"Not using Inode routines due to -mat_no_inode\n");CHKERRQ(ierr);}
2017     ierr = PetscOptionsInt("-mat_inode_limit","Do not use inodes larger then this value",PETSC_NULL,b->inode.limit,&b->inode.limit,PETSC_NULL);CHKERRQ(ierr);
2018   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2019   b->inode.use = (PetscTruth)(!(no_unroll || no_inode));
2020   if (b->inode.limit > b->inode.max_limit) b->inode.limit = b->inode.max_limit;
2021 
2022   PetscFunctionReturn(0);
2023 }
2024 EXTERN_C_END
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
2028 /*@C
2029    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
2030    compressed row) format.  For good matrix assembly performance the
2031    user should preallocate the matrix storage by setting the parameter nz
2032    (or the array nnz).  By setting these parameters accurately, performance
2033    during matrix assembly can be increased by more than a factor of 50.
2034 
2035    Collective on Mat
2036 
2037    Input Parameters:
2038 +  A - the symmetric matrix
2039 .  bs - size of block
2040 .  nz - number of block nonzeros per block row (same for all rows)
2041 -  nnz - array containing the number of block nonzeros in the upper triangular plus
2042          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
2043 
2044    Options Database Keys:
2045 .   -mat_no_unroll - uses code that does not unroll the loops in the
2046                      block calculations (much slower)
2047 .    -mat_block_size - size of the blocks to use (only works if a negative bs is passed in
2048 
2049    Level: intermediate
2050 
2051    Notes:
2052    Specify the preallocated storage with either nz or nnz (not both).
2053    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2054    allocation.  For additional details, see the users manual chapter on
2055    matrices.
2056 
2057    You can call MatGetInfo() to get information on how effective the preallocation was;
2058    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2059    You can also run with the option -info and look for messages with the string
2060    malloc in them to see if additional memory allocation was needed.
2061 
2062    If the nnz parameter is given then the nz parameter is ignored
2063 
2064 
2065 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
2066 @*/
2067 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2068 {
2069   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2070 
2071   PetscFunctionBegin;
2072   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2073   if (f) {
2074     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2075   }
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatCreateSeqSBAIJ"
2081 /*@C
2082    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
2083    compressed row) format.  For good matrix assembly performance the
2084    user should preallocate the matrix storage by setting the parameter nz
2085    (or the array nnz).  By setting these parameters accurately, performance
2086    during matrix assembly can be increased by more than a factor of 50.
2087 
2088    Collective on MPI_Comm
2089 
2090    Input Parameters:
2091 +  comm - MPI communicator, set to PETSC_COMM_SELF
2092 .  bs - size of block
2093 .  m - number of rows, or number of columns
2094 .  nz - number of block nonzeros per block row (same for all rows)
2095 -  nnz - array containing the number of block nonzeros in the upper triangular plus
2096          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
2097 
2098    Output Parameter:
2099 .  A - the symmetric matrix
2100 
2101    Options Database Keys:
2102 .   -mat_no_unroll - uses code that does not unroll the loops in the
2103                      block calculations (much slower)
2104 .    -mat_block_size - size of the blocks to use
2105 
2106    Level: intermediate
2107 
2108    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2109    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2110    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2111 
2112    Notes:
2113    The number of rows and columns must be divisible by blocksize.
2114    This matrix type does not support complex Hermitian operation.
2115 
2116    Specify the preallocated storage with either nz or nnz (not both).
2117    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2118    allocation.  For additional details, see the users manual chapter on
2119    matrices.
2120 
2121    If the nnz parameter is given then the nz parameter is ignored
2122 
2123 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
2124 @*/
2125 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2126 {
2127   PetscErrorCode ierr;
2128 
2129   PetscFunctionBegin;
2130   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2131   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2132   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2133   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
2139 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2140 {
2141   Mat            C;
2142   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
2143   PetscErrorCode ierr;
2144   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2145 
2146   PetscFunctionBegin;
2147   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2148 
2149   *B = 0;
2150   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
2151   ierr = MatSetSizes(C,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
2152   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2153   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2154   c    = (Mat_SeqSBAIJ*)C->data;
2155 
2156   C->preallocated       = PETSC_TRUE;
2157   C->factor             = A->factor;
2158   c->row                = 0;
2159   c->icol               = 0;
2160   c->saved_values       = 0;
2161   c->keepnonzeropattern = a->keepnonzeropattern;
2162   C->assembled          = PETSC_TRUE;
2163 
2164   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
2165   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
2166   c->bs2  = a->bs2;
2167   c->mbs  = a->mbs;
2168   c->nbs  = a->nbs;
2169 
2170   ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr);
2171   for (i=0; i<mbs; i++) {
2172     c->imax[i] = a->imax[i];
2173     c->ilen[i] = a->ilen[i];
2174   }
2175 
2176   /* allocate the matrix space */
2177   ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2178   c->singlemalloc = PETSC_TRUE;
2179   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2180   ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
2181   if (mbs > 0) {
2182     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2183     if (cpvalues == MAT_COPY_VALUES) {
2184       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2185     } else {
2186       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2187     }
2188     if (a->jshort) {
2189       ierr = PetscMalloc(nz*sizeof(unsigned short),&c->jshort);CHKERRQ(ierr);
2190       ierr = PetscMemcpy(c->jshort,a->jshort,nz*sizeof(unsigned short));CHKERRQ(ierr);
2191     }
2192   }
2193 
2194   c->roworiented = a->roworiented;
2195   c->nonew       = a->nonew;
2196 
2197   if (a->diag) {
2198     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2199     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2200     for (i=0; i<mbs; i++) {
2201       c->diag[i] = a->diag[i];
2202     }
2203   } else c->diag  = 0;
2204   c->nz           = a->nz;
2205   c->maxnz        = a->maxnz;
2206   c->solve_work   = 0;
2207   c->mult_work    = 0;
2208   c->free_a       = PETSC_TRUE;
2209   c->free_ij      = PETSC_TRUE;
2210   *B = C;
2211   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 #undef __FUNCT__
2216 #define __FUNCT__ "MatLoad_SeqSBAIJ"
2217 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2218 {
2219   Mat_SeqSBAIJ   *a;
2220   Mat            B;
2221   PetscErrorCode ierr;
2222   int            fd;
2223   PetscMPIInt    size;
2224   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2225   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
2226   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2227   PetscInt       *masked,nmask,tmp,bs2,ishift;
2228   PetscScalar    *aa;
2229   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2230 
2231   PetscFunctionBegin;
2232   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2233   bs2  = bs*bs;
2234 
2235   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2236   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2237   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2238   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2239   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2240   M = header[1]; N = header[2]; nz = header[3];
2241 
2242   if (header[3] < 0) {
2243     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
2244   }
2245 
2246   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2247 
2248   /*
2249      This code adds extra rows to make sure the number of rows is
2250     divisible by the blocksize
2251   */
2252   mbs        = M/bs;
2253   extra_rows = bs - M + bs*(mbs);
2254   if (extra_rows == bs) extra_rows = 0;
2255   else                  mbs++;
2256   if (extra_rows) {
2257     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2258   }
2259 
2260   /* read in row lengths */
2261   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2262   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2263   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2264 
2265   /* read in column indices */
2266   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2267   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2268   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2269 
2270   /* loop over row lengths determining block row lengths */
2271   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
2272   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2273   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2274   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2275   masked   = mask + mbs;
2276   rowcount = 0; nzcount = 0;
2277   for (i=0; i<mbs; i++) {
2278     nmask = 0;
2279     for (j=0; j<bs; j++) {
2280       kmax = rowlengths[rowcount];
2281       for (k=0; k<kmax; k++) {
2282         tmp = jj[nzcount++]/bs;   /* block col. index */
2283         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
2284       }
2285       rowcount++;
2286     }
2287     s_browlengths[i] += nmask;
2288 
2289     /* zero out the mask elements we set */
2290     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2291   }
2292 
2293   /* create our matrix */
2294   ierr = MatCreate(comm,&B);CHKERRQ(ierr);
2295   ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2296   ierr = MatSetType(B,type);CHKERRQ(ierr);
2297   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
2298   a = (Mat_SeqSBAIJ*)B->data;
2299 
2300   /* set matrix "i" values */
2301   a->i[0] = 0;
2302   for (i=1; i<= mbs; i++) {
2303     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
2304     a->ilen[i-1] = s_browlengths[i-1];
2305   }
2306   a->nz = a->i[mbs];
2307 
2308   /* read in nonzero values */
2309   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2310   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2311   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2312 
2313   /* set "a" and "j" values into matrix */
2314   nzcount = 0; jcount = 0;
2315   for (i=0; i<mbs; i++) {
2316     nzcountb = nzcount;
2317     nmask    = 0;
2318     for (j=0; j<bs; j++) {
2319       kmax = rowlengths[i*bs+j];
2320       for (k=0; k<kmax; k++) {
2321         tmp = jj[nzcount++]/bs; /* block col. index */
2322         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
2323       }
2324     }
2325     /* sort the masked values */
2326     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2327 
2328     /* set "j" values into matrix */
2329     maskcount = 1;
2330     for (j=0; j<nmask; j++) {
2331       a->j[jcount++]  = masked[j];
2332       mask[masked[j]] = maskcount++;
2333     }
2334 
2335     /* set "a" values into matrix */
2336     ishift = bs2*a->i[i];
2337     for (j=0; j<bs; j++) {
2338       kmax = rowlengths[i*bs+j];
2339       for (k=0; k<kmax; k++) {
2340         tmp       = jj[nzcountb]/bs ; /* block col. index */
2341         if (tmp >= i){
2342           block     = mask[tmp] - 1;
2343           point     = jj[nzcountb] - bs*tmp;
2344           idx       = ishift + bs2*block + j + bs*point;
2345           a->a[idx] = aa[nzcountb];
2346         }
2347         nzcountb++;
2348       }
2349     }
2350     /* zero out the mask elements we set */
2351     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2352   }
2353   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2354 
2355   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2356   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
2357   ierr = PetscFree(aa);CHKERRQ(ierr);
2358   ierr = PetscFree(jj);CHKERRQ(ierr);
2359   ierr = PetscFree(mask);CHKERRQ(ierr);
2360 
2361   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2362   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2363   ierr = MatView_Private(B);CHKERRQ(ierr);
2364   *A = B;
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatRelax_SeqSBAIJ"
2370 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2371 {
2372   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
2373   const MatScalar *aa=a->a,*v,*v1,*aidiag;
2374   PetscScalar     *x,*b,*t,sum;
2375   MatScalar       tmp;
2376   PetscErrorCode  ierr;
2377   PetscInt        m=a->mbs,bs=A->rmap->bs,j;
2378   const PetscInt  *ai=a->i,*aj=a->j,*vj,*vj1;
2379   PetscInt        nz,nz1,i;
2380 
2381   PetscFunctionBegin;
2382   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
2383 
2384   its = its*lits;
2385   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2386 
2387   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2388 
2389   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2390   if (xx != bb) {
2391     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2392   } else {
2393     b = x;
2394   }
2395 
2396   if (!a->idiagvalid) {
2397     if (!a->idiag) {
2398       ierr     = PetscMalloc(m*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
2399     }
2400     for (i=0; i<a->mbs; i++) a->idiag[i] = 1.0/a->a[a->i[i]];
2401     a->idiagvalid = PETSC_TRUE;
2402   }
2403 
2404   if (!a->relax_work) {
2405     ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr);
2406   }
2407   t = a->relax_work;
2408 
2409   aidiag = a->idiag;
2410 
2411   if (flag == SOR_APPLY_UPPER) {
2412     /* apply (U + D/omega) to the vector */
2413     PetscScalar d;
2414     for (i=0; i<m; i++) {
2415       d    = fshift + aa[ai[i]];
2416       nz   = ai[i+1] - ai[i] - 1;
2417       vj   = aj + ai[i] + 1;
2418       v    = aa + ai[i] + 1;
2419       sum  = b[i]*d/omega;
2420       PetscSparseDensePlusDot(sum,b,v,vj,nz);
2421       x[i] = sum;
2422     }
2423     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
2424   }
2425 
2426   if (flag & SOR_ZERO_INITIAL_GUESS) {
2427     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2428       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2429 
2430       v  = aa + 1;
2431       vj = aj + 1;
2432       for (i=0; i<m; i++){
2433         nz = ai[i+1] - ai[i] - 1;
2434         tmp = - (x[i] = omega*t[i]*aidiag[i]);
2435         for (j=0; j<nz; j++) {
2436           t[vj[j]] += tmp*v[j];
2437         }
2438         v  += nz + 1;
2439         vj += nz + 1;
2440       }
2441       ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
2442     }
2443 
2444     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2445       int nz2;
2446       if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){
2447         t = b;
2448       }
2449 
2450       v  = aa + ai[m-1] + 1;
2451       vj = aj + ai[m-1] + 1;
2452       nz = 0;
2453       for (i=m-1; i>=0; i--){
2454         sum = 0.0;
2455         nz2 = ai[i] - ai[i-1] - 1;
2456 	PETSC_Prefetch(v-nz2-1,0,1);
2457 	PETSC_Prefetch(vj-nz2-1,0,1);
2458         PetscSparseDensePlusDot(sum,x,v,vj,nz);
2459         sum = t[i] - sum;
2460         if (t == b) {
2461 	  x[i] = omega*sum*aidiag[i];
2462         } else {
2463 	  x[i] = (1-omega)*x[i] + omega*sum*aidiag[i];
2464         }
2465         nz  = nz2;
2466         v  -= nz + 1;
2467         vj -= nz + 1;
2468       }
2469       t = a->relax_work;
2470       ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
2471     }
2472     its--;
2473   }
2474 
2475   while (its--) {
2476     /*
2477        forward sweep:
2478        for i=0,...,m-1:
2479          sum[i] = (b[i] - U(i,:)x )/d[i];
2480          x[i]   = (1-omega)x[i] + omega*sum[i];
2481          b      = b - x[i]*U^T(i,:);
2482 
2483     */
2484     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2485       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2486 
2487       for (i=0; i<m; i++){
2488         v  = aa + ai[i] + 1; v1=v;
2489         vj = aj + ai[i] + 1; vj1=vj;
2490         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2491         sum = t[i];
2492         ierr = PetscLogFlops(4.0*nz-2);CHKERRQ(ierr);
2493         while (nz1--) sum -= (*v1++)*x[*vj1++];
2494         x[i] = (1-omega)*x[i] + omega*sum*aidiag[i];
2495         while (nz--) t[*vj++] -= x[i]*(*v++);
2496       }
2497     }
2498 
2499     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2500       /*
2501        backward sweep:
2502        b = b - x[i]*U^T(i,:), i=0,...,n-2
2503        for i=m-1,...,0:
2504          sum[i] = (b[i] - U(i,:)x )/d[i];
2505          x[i]   = (1-omega)x[i] + omega*sum[i];
2506       */
2507       /* if there was a forward sweep done above then I thing the next two for loops are not needed */
2508       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2509 
2510       for (i=0; i<m-1; i++){  /* update rhs */
2511         v  = aa + ai[i] + 1;
2512         vj = aj + ai[i] + 1;
2513         nz = ai[i+1] - ai[i] - 1;
2514         ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr);
2515         while (nz--) t[*vj++] -= x[i]*(*v++);
2516       }
2517       for (i=m-1; i>=0; i--){
2518         v  = aa + ai[i] + 1;
2519         vj = aj + ai[i] + 1;
2520         nz = ai[i+1] - ai[i] - 1;
2521         ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr);
2522         sum = t[i];
2523         while (nz--) sum -= x[*vj++]*(*v++);
2524         x[i] =   (1-omega)*x[i] + omega*sum*aidiag[i];
2525       }
2526     }
2527   }
2528 
2529   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2530   if (bb != xx) {
2531     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2532   }
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 #undef __FUNCT__
2537 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays"
2538 /*@
2539      MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements
2540               (upper triangular entries in CSR format) provided by the user.
2541 
2542      Collective on MPI_Comm
2543 
2544    Input Parameters:
2545 +  comm - must be an MPI communicator of size 1
2546 .  bs - size of block
2547 .  m - number of rows
2548 .  n - number of columns
2549 .  i - row indices
2550 .  j - column indices
2551 -  a - matrix values
2552 
2553    Output Parameter:
2554 .  mat - the matrix
2555 
2556    Level: intermediate
2557 
2558    Notes:
2559        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2560     once the matrix is destroyed
2561 
2562        You cannot set new nonzero locations into this matrix, that will generate an error.
2563 
2564        The i and j indices are 0 based
2565 
2566 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ()
2567 
2568 @*/
2569 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2570 {
2571   PetscErrorCode ierr;
2572   PetscInt       ii;
2573   Mat_SeqSBAIJ   *sbaij;
2574 
2575   PetscFunctionBegin;
2576   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2577   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2578 
2579   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2580   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2581   ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr);
2582   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2583   sbaij = (Mat_SeqSBAIJ*)(*mat)->data;
2584   ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr);
2585 
2586   sbaij->i = i;
2587   sbaij->j = j;
2588   sbaij->a = a;
2589   sbaij->singlemalloc = PETSC_FALSE;
2590   sbaij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2591   sbaij->free_a       = PETSC_FALSE;
2592   sbaij->free_ij      = PETSC_FALSE;
2593 
2594   for (ii=0; ii<m; ii++) {
2595     sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii];
2596 #if defined(PETSC_USE_DEBUG)
2597     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2598 #endif
2599   }
2600 #if defined(PETSC_USE_DEBUG)
2601   for (ii=0; ii<sbaij->i[m]; ii++) {
2602     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2603     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2604   }
2605 #endif
2606 
2607   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2608   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 
2613 
2614 
2615 
2616