xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 4da8f2458232f57198ffbca67ee6b4144f494936)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "../src/mat/impls/sbaij/seq/sbaij.h"
9 
10 #include "../src/mat/impls/sbaij/seq/relax.h"
11 #define USESHORT
12 #include "../src/mat/impls/sbaij/seq/relax.h"
13 
14 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth);
15 #define CHUNKSIZE  10
16 
17 
18 /*
19      Checks for missing diagonals
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
23 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A,PetscTruth *missing,PetscInt *dd)
24 {
25   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
26   PetscErrorCode ierr;
27   PetscInt       *diag,*jj = a->j,i;
28 
29   PetscFunctionBegin;
30   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
31   diag = a->diag;
32   *missing = PETSC_FALSE;
33   for (i=0; i<a->mbs; i++) {
34     if (jj[diag[i]] != i) {
35       *missing    = PETSC_TRUE;
36       if (dd) *dd = i;
37       break;
38     }
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
45 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
46 {
47   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
48   PetscErrorCode ierr;
49   PetscInt       i;
50 
51   PetscFunctionBegin;
52   if (!a->diag) {
53     ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
54   }
55   for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i];
56   PetscFunctionReturn(0);
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
61 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
62 {
63   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
64   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   *nn = n;
69   if (!ia) PetscFunctionReturn(0);
70   if (!blockcompressed) {
71     /* malloc & create the natural set of indices */
72     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
73     for (i=0; i<n+1; i++) {
74       for (j=0; j<bs; j++) {
75         *ia[i*bs+j] = a->i[i]*bs+j+oshift;
76       }
77     }
78     for (i=0; i<nz; i++) {
79       for (j=0; j<bs; j++) {
80         *ja[i*bs+j] = a->j[i]*bs+j+oshift;
81       }
82     }
83   } else { /* blockcompressed */
84     if (oshift == 1) {
85       /* temporarily add 1 to i and j indices */
86       for (i=0; i<nz; i++) a->j[i]++;
87       for (i=0; i<n+1; i++) a->i[i]++;
88     }
89     *ia = a->i; *ja = a->j;
90   }
91 
92   PetscFunctionReturn(0);
93 }
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
97 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
98 {
99   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
100   PetscInt     i,n = a->mbs,nz = a->i[n];
101   PetscErrorCode ierr;
102 
103   PetscFunctionBegin;
104   if (!ia) PetscFunctionReturn(0);
105 
106   if (!blockcompressed) {
107     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
108   } else if (oshift == 1) { /* blockcompressed */
109     for (i=0; i<nz; i++) a->j[i]--;
110     for (i=0; i<n+1; i++) a->i[i]--;
111   }
112 
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
118 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
119 {
120   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
121   PetscErrorCode ierr;
122 
123   PetscFunctionBegin;
124 #if defined(PETSC_USE_LOG)
125   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap->N,a->nz);
126 #endif
127   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
128   if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);}
129   if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);}
130   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
131   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
132   if (a->inode.size) {ierr = PetscFree(a->inode.size);CHKERRQ(ierr);}
133   ierr = PetscFree(a->diag);CHKERRQ(ierr);
134   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
135   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
136   ierr = PetscFree(a->relax_work);CHKERRQ(ierr);
137   ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
138   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
139   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
140   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
141   if (a->free_jshort) {ierr = PetscFree(a->jshort);CHKERRQ(ierr);}
142   ierr = PetscFree(a->inew);CHKERRQ(ierr);
143   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
144   ierr = PetscFree(a);CHKERRQ(ierr);
145 
146   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
147   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
148   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
149   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
150   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
151   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
152   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 #undef __FUNCT__
157 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
158 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg)
159 {
160   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
161   PetscErrorCode ierr;
162 
163   PetscFunctionBegin;
164   switch (op) {
165   case MAT_ROW_ORIENTED:
166     a->roworiented = flg;
167     break;
168   case MAT_KEEP_NONZERO_PATTERN:
169     a->keepnonzeropattern = flg;
170     break;
171   case MAT_NEW_NONZERO_LOCATIONS:
172     a->nonew = (flg ? 0 : 1);
173     break;
174   case MAT_NEW_NONZERO_LOCATION_ERR:
175     a->nonew = (flg ? -1 : 0);
176     break;
177   case MAT_NEW_NONZERO_ALLOCATION_ERR:
178     a->nonew = (flg ? -2 : 0);
179     break;
180   case MAT_UNUSED_NONZERO_LOCATION_ERR:
181     a->nounused = (flg ? -1 : 0);
182     break;
183   case MAT_NEW_DIAGONALS:
184   case MAT_IGNORE_OFF_PROC_ENTRIES:
185   case MAT_USE_HASH_TABLE:
186     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
187     break;
188   case MAT_HERMITIAN:
189     if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
190   case MAT_SYMMETRIC:
191   case MAT_STRUCTURALLY_SYMMETRIC:
192   case MAT_SYMMETRY_ETERNAL:
193     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
194     ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr);
195     break;
196   case MAT_IGNORE_LOWER_TRIANGULAR:
197     a->ignore_ltriangular = flg;
198     break;
199   case MAT_ERROR_LOWER_TRIANGULAR:
200     a->ignore_ltriangular = flg;
201     break;
202   case MAT_GETROW_UPPERTRIANGULAR:
203     a->getrow_utriangular = flg;
204     break;
205   default:
206     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
207   }
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
213 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
214 {
215   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
216   PetscErrorCode ierr;
217   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
218   MatScalar      *aa,*aa_i;
219   PetscScalar    *v_i;
220 
221   PetscFunctionBegin;
222   if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()");
223   /* Get the upper triangular part of the row */
224   bs  = A->rmap->bs;
225   ai  = a->i;
226   aj  = a->j;
227   aa  = a->a;
228   bs2 = a->bs2;
229 
230   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
231 
232   bn  = row/bs;   /* Block number */
233   bp  = row % bs; /* Block position */
234   M   = ai[bn+1] - ai[bn];
235   *ncols = bs*M;
236 
237   if (v) {
238     *v = 0;
239     if (*ncols) {
240       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
241       for (i=0; i<M; i++) { /* for each block in the block row */
242         v_i  = *v + i*bs;
243         aa_i = aa + bs2*(ai[bn] + i);
244         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
245       }
246     }
247   }
248 
249   if (cols) {
250     *cols = 0;
251     if (*ncols) {
252       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
253       for (i=0; i<M; i++) { /* for each block in the block row */
254         cols_i = *cols + i*bs;
255         itmp  = bs*aj[ai[bn] + i];
256         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
257       }
258     }
259   }
260 
261   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
262   /* this segment is currently removed, so only entries in the upper triangle are obtained */
263 #ifdef column_search
264   v_i    = *v    + M*bs;
265   cols_i = *cols + M*bs;
266   for (i=0; i<bn; i++){ /* for each block row */
267     M = ai[i+1] - ai[i];
268     for (j=0; j<M; j++){
269       itmp = aj[ai[i] + j];    /* block column value */
270       if (itmp == bn){
271         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
272         for (k=0; k<bs; k++) {
273           *cols_i++ = i*bs+k;
274           *v_i++    = aa_i[k];
275         }
276         *ncols += bs;
277         break;
278       }
279     }
280   }
281 #endif
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
287 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
288 {
289   PetscErrorCode ierr;
290 
291   PetscFunctionBegin;
292   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
293   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ"
299 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
300 {
301   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
302 
303   PetscFunctionBegin;
304   a->getrow_utriangular = PETSC_TRUE;
305   PetscFunctionReturn(0);
306 }
307 #undef __FUNCT__
308 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ"
309 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
310 {
311   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
312 
313   PetscFunctionBegin;
314   a->getrow_utriangular = PETSC_FALSE;
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
320 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,MatReuse reuse,Mat *B)
321 {
322   PetscErrorCode ierr;
323   PetscFunctionBegin;
324   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
325     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
326   }
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
332 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
333 {
334   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
335   PetscErrorCode    ierr;
336   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
337   const char        *name;
338   PetscViewerFormat format;
339 
340   PetscFunctionBegin;
341   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
342   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
343   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
344     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
345   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
346     Mat aij;
347 
348     if (A->factor && bs>1){
349       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr);
350       PetscFunctionReturn(0);
351     }
352     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
353     ierr = MatView(aij,viewer);CHKERRQ(ierr);
354     ierr = MatDestroy(aij);CHKERRQ(ierr);
355   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
356     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
357     for (i=0; i<a->mbs; i++) {
358       for (j=0; j<bs; j++) {
359         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
360         for (k=a->i[i]; k<a->i[i+1]; k++) {
361           for (l=0; l<bs; l++) {
362 #if defined(PETSC_USE_COMPLEX)
363             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
364               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
365                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
366             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
367               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
368                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
369             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
370               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
371             }
372 #else
373             if (a->a[bs2*k + l*bs + j] != 0.0) {
374               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
375             }
376 #endif
377           }
378         }
379         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
380       }
381     }
382     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
383   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
384      PetscFunctionReturn(0);
385   } else {
386     if (A->factor && bs>1){
387       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr);
388     }
389     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
390     for (i=0; i<a->mbs; i++) {
391       for (j=0; j<bs; j++) {
392         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
393         for (k=a->i[i]; k<a->i[i+1]; k++) {
394           for (l=0; l<bs; l++) {
395 #if defined(PETSC_USE_COMPLEX)
396             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
397               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
398                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
399             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
400               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
401                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
402             } else {
403               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
404             }
405 #else
406             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
407 #endif
408           }
409         }
410         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
411       }
412     }
413     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
414   }
415   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
421 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
422 {
423   Mat            A = (Mat) Aa;
424   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
425   PetscErrorCode ierr;
426   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
427   PetscMPIInt    rank;
428   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
429   MatScalar      *aa;
430   MPI_Comm       comm;
431   PetscViewer    viewer;
432 
433   PetscFunctionBegin;
434   /*
435     This is nasty. If this is called from an originally parallel matrix
436     then all processes call this,but only the first has the matrix so the
437     rest should return immediately.
438   */
439   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
440   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
441   if (rank) PetscFunctionReturn(0);
442 
443   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
444 
445   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
446   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
447 
448   /* loop over matrix elements drawing boxes */
449   color = PETSC_DRAW_BLUE;
450   for (i=0,row=0; i<mbs; i++,row+=bs) {
451     for (j=a->i[i]; j<a->i[i+1]; j++) {
452       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
453       x_l = a->j[j]*bs; x_r = x_l + 1.0;
454       aa = a->a + j*bs2;
455       for (k=0; k<bs; k++) {
456         for (l=0; l<bs; l++) {
457           if (PetscRealPart(*aa++) >=  0.) continue;
458           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
459         }
460       }
461     }
462   }
463   color = PETSC_DRAW_CYAN;
464   for (i=0,row=0; i<mbs; i++,row+=bs) {
465     for (j=a->i[i]; j<a->i[i+1]; j++) {
466       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
467       x_l = a->j[j]*bs; x_r = x_l + 1.0;
468       aa = a->a + j*bs2;
469       for (k=0; k<bs; k++) {
470         for (l=0; l<bs; l++) {
471           if (PetscRealPart(*aa++) != 0.) continue;
472           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
473         }
474       }
475     }
476   }
477 
478   color = PETSC_DRAW_RED;
479   for (i=0,row=0; i<mbs; i++,row+=bs) {
480     for (j=a->i[i]; j<a->i[i+1]; j++) {
481       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
482       x_l = a->j[j]*bs; x_r = x_l + 1.0;
483       aa = a->a + j*bs2;
484       for (k=0; k<bs; k++) {
485         for (l=0; l<bs; l++) {
486           if (PetscRealPart(*aa++) <= 0.) continue;
487           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
488         }
489       }
490     }
491   }
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
497 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
498 {
499   PetscErrorCode ierr;
500   PetscReal      xl,yl,xr,yr,w,h;
501   PetscDraw      draw;
502   PetscTruth     isnull;
503 
504   PetscFunctionBegin;
505   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
506   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
507 
508   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
509   xr  = A->rmap->N; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
510   xr += w;    yr += h;  xl = -w;     yl = -h;
511   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
512   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
513   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNCT__
518 #define __FUNCT__ "MatView_SeqSBAIJ"
519 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
520 {
521   PetscErrorCode ierr;
522   PetscTruth     iascii,isdraw;
523 
524   PetscFunctionBegin;
525   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
526   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
527   if (iascii){
528     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
529   } else if (isdraw) {
530     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
531   } else {
532     Mat B;
533     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
534     ierr = MatView(B,viewer);CHKERRQ(ierr);
535     ierr = MatDestroy(B);CHKERRQ(ierr);
536   }
537   PetscFunctionReturn(0);
538 }
539 
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
543 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
544 {
545   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
546   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
547   PetscInt     *ai = a->i,*ailen = a->ilen;
548   PetscInt     brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
549   MatScalar    *ap,*aa = a->a;
550 
551   PetscFunctionBegin;
552   for (k=0; k<m; k++) { /* loop over rows */
553     row  = im[k]; brow = row/bs;
554     if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */
555     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
556     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
557     nrow = ailen[brow];
558     for (l=0; l<n; l++) { /* loop over columns */
559       if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */
560       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
561       col  = in[l] ;
562       bcol = col/bs;
563       cidx = col%bs;
564       ridx = row%bs;
565       high = nrow;
566       low  = 0; /* assume unsorted */
567       while (high-low > 5) {
568         t = (low+high)/2;
569         if (rp[t] > bcol) high = t;
570         else             low  = t;
571       }
572       for (i=low; i<high; i++) {
573         if (rp[i] > bcol) break;
574         if (rp[i] == bcol) {
575           *v++ = ap[bs2*i+bs*cidx+ridx];
576           goto finished;
577         }
578       }
579       *v++ = 0.0;
580       finished:;
581     }
582   }
583   PetscFunctionReturn(0);
584 }
585 
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
589 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
590 {
591   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
592   PetscErrorCode    ierr;
593   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
594   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
595   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
596   PetscTruth        roworiented=a->roworiented;
597   const PetscScalar *value = v;
598   MatScalar         *ap,*aa = a->a,*bap;
599 
600   PetscFunctionBegin;
601   if (roworiented) {
602     stepval = (n-1)*bs;
603   } else {
604     stepval = (m-1)*bs;
605   }
606   for (k=0; k<m; k++) { /* loop over added rows */
607     row  = im[k];
608     if (row < 0) continue;
609 #if defined(PETSC_USE_DEBUG)
610     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
611 #endif
612     rp   = aj + ai[row];
613     ap   = aa + bs2*ai[row];
614     rmax = imax[row];
615     nrow = ailen[row];
616     low  = 0;
617     high = nrow;
618     for (l=0; l<n; l++) { /* loop over added columns */
619       if (in[l] < 0) continue;
620       col = in[l];
621 #if defined(PETSC_USE_DEBUG)
622       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
623 #endif
624       if (col < row) continue; /* ignore lower triangular block */
625       if (roworiented) {
626         value = v + k*(stepval+bs)*bs + l*bs;
627       } else {
628         value = v + l*(stepval+bs)*bs + k*bs;
629       }
630       if (col <= lastcol) low = 0; else high = nrow;
631       lastcol = col;
632       while (high-low > 7) {
633         t = (low+high)/2;
634         if (rp[t] > col) high = t;
635         else             low  = t;
636       }
637       for (i=low; i<high; i++) {
638         if (rp[i] > col) break;
639         if (rp[i] == col) {
640           bap  = ap +  bs2*i;
641           if (roworiented) {
642             if (is == ADD_VALUES) {
643               for (ii=0; ii<bs; ii++,value+=stepval) {
644                 for (jj=ii; jj<bs2; jj+=bs) {
645                   bap[jj] += *value++;
646                 }
647               }
648             } else {
649               for (ii=0; ii<bs; ii++,value+=stepval) {
650                 for (jj=ii; jj<bs2; jj+=bs) {
651                   bap[jj] = *value++;
652                 }
653                }
654             }
655           } else {
656             if (is == ADD_VALUES) {
657               for (ii=0; ii<bs; ii++,value+=stepval) {
658                 for (jj=0; jj<bs; jj++) {
659                   *bap++ += *value++;
660                 }
661               }
662             } else {
663               for (ii=0; ii<bs; ii++,value+=stepval) {
664                 for (jj=0; jj<bs; jj++) {
665                   *bap++  = *value++;
666                 }
667               }
668             }
669           }
670           goto noinsert2;
671         }
672       }
673       if (nonew == 1) goto noinsert2;
674       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
675       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
676       N = nrow++ - 1; high++;
677       /* shift up all the later entries in this row */
678       for (ii=N; ii>=i; ii--) {
679         rp[ii+1] = rp[ii];
680         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
681       }
682       if (N >= i) {
683         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
684       }
685       rp[i] = col;
686       bap   = ap +  bs2*i;
687       if (roworiented) {
688         for (ii=0; ii<bs; ii++,value+=stepval) {
689           for (jj=ii; jj<bs2; jj+=bs) {
690             bap[jj] = *value++;
691           }
692         }
693       } else {
694         for (ii=0; ii<bs; ii++,value+=stepval) {
695           for (jj=0; jj<bs; jj++) {
696             *bap++  = *value++;
697           }
698         }
699        }
700     noinsert2:;
701       low = i;
702     }
703     ailen[row] = nrow;
704   }
705    PetscFunctionReturn(0);
706 }
707 
708 /*
709     This is not yet used
710 */
711 #undef __FUNCT__
712 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ_Inode"
713 PetscErrorCode MatAssemblyEnd_SeqSBAIJ_Inode(Mat A)
714 {
715   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
716   PetscErrorCode  ierr;
717   const PetscInt  *ai = a->i, *aj = a->j,*cols;
718   PetscInt        i = 0,j,blk_size,m = A->rmap->n,node_count = 0,nzx,nzy,*ns,row,nz,cnt,cnt2,*counts;
719   PetscTruth      flag;
720 
721   PetscFunctionBegin;
722   ierr = PetscMalloc(m*sizeof(PetscInt),&ns);CHKERRQ(ierr);
723   while (i < m){
724     nzx = ai[i+1] - ai[i];       /* Number of nonzeros */
725     /* Limits the number of elements in a node to 'a->inode.limit' */
726     for (j=i+1,blk_size=1; j<m && blk_size <a->inode.limit; ++j,++blk_size) {
727       nzy  = ai[j+1] - ai[j];
728       if (nzy != (nzx - j + i)) break;
729       ierr = PetscMemcmp(aj + ai[i] + j - i,aj + ai[j],nzy*sizeof(PetscInt),&flag);CHKERRQ(ierr);
730       if (!flag) break;
731     }
732     ns[node_count++] = blk_size;
733     i = j;
734   }
735   if (!a->inode.size && m && node_count > .9*m) {
736     ierr = PetscFree(ns);CHKERRQ(ierr);
737     ierr = PetscInfo2(A,"Found %D nodes out of %D rows. Not using Inode routines\n",node_count,m);CHKERRQ(ierr);
738   } else {
739     a->inode.node_count = node_count;
740     ierr = PetscMalloc(node_count*sizeof(PetscInt),&a->inode.size);CHKERRQ(ierr);
741     ierr = PetscMemcpy(a->inode.size,ns,node_count*sizeof(PetscInt));
742     ierr = PetscFree(ns);CHKERRQ(ierr);
743     ierr = PetscInfo3(A,"Found %D nodes of %D. Limit used: %D. Using Inode routines\n",node_count,m,a->inode.limit);CHKERRQ(ierr);
744 
745     /* count collections of adjacent columns in each inode */
746     row = 0;
747     cnt = 0;
748     for (i=0; i<node_count; i++) {
749       cols = aj + ai[row] + a->inode.size[i];
750       nz   = ai[row+1] - ai[row] - a->inode.size[i];
751       for (j=1; j<nz; j++) {
752         if (cols[j] != cols[j-1]+1) {
753           cnt++;
754         }
755       }
756       cnt++;
757       row += a->inode.size[i];
758     }
759     ierr = PetscMalloc(2*cnt*sizeof(PetscInt),&counts);CHKERRQ(ierr);
760     cnt = 0;
761     row = 0;
762     for (i=0; i<node_count; i++) {
763       cols          = aj + ai[row] + a->inode.size[i];
764 	  CHKMEMQ;
765       counts[2*cnt] = cols[0];
766 	  CHKMEMQ;
767       nz            = ai[row+1] - ai[row] - a->inode.size[i];
768       cnt2          = 1;
769       for (j=1; j<nz; j++) {
770         if (cols[j] != cols[j-1]+1) {
771 	  CHKMEMQ;
772           counts[2*(cnt++)+1] = cnt2;
773           counts[2*cnt]       = cols[j];
774 	  CHKMEMQ;
775           cnt2                = 1;
776         } else cnt2++;
777       }
778 	  CHKMEMQ;
779       counts[2*(cnt++)+1] = cnt2;
780 	  CHKMEMQ;
781       row += a->inode.size[i];
782     }
783     ierr = PetscIntView(2*cnt,counts,0);
784   }
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
790 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
791 {
792   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
793   PetscErrorCode ierr;
794   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
795   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
796   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
797   MatScalar      *aa = a->a,*ap;
798 
799   PetscFunctionBegin;
800   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
801 
802   if (m) rmax = ailen[0];
803   for (i=1; i<mbs; i++) {
804     /* move each row back by the amount of empty slots (fshift) before it*/
805     fshift += imax[i-1] - ailen[i-1];
806      rmax   = PetscMax(rmax,ailen[i]);
807      if (fshift) {
808        ip = aj + ai[i]; ap = aa + bs2*ai[i];
809        N = ailen[i];
810        for (j=0; j<N; j++) {
811          ip[j-fshift] = ip[j];
812          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
813        }
814      }
815      ai[i] = ai[i-1] + ailen[i-1];
816   }
817   if (mbs) {
818     fshift += imax[mbs-1] - ailen[mbs-1];
819      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
820   }
821   /* reset ilen and imax for each row */
822   for (i=0; i<mbs; i++) {
823     ailen[i] = imax[i] = ai[i+1] - ai[i];
824   }
825   a->nz = ai[mbs];
826 
827   /* diagonals may have moved, reset it */
828   if (a->diag) {
829     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
830   }
831   if (fshift && a->nounused == -1) {
832     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
833   }
834   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap->N,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
835   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
836   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
837   a->reallocs          = 0;
838   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
839   a->idiagvalid = PETSC_FALSE;
840 
841   if (A->cmap->n < 65536 && A->cmap->bs == 1) {
842     if (!a->jshort) {
843       ierr = PetscMalloc(a->i[A->rmap->n]*sizeof(unsigned short),&a->jshort);CHKERRQ(ierr);
844       for (i=0; i<a->i[A->rmap->n]; i++) a->jshort[i] = a->j[i];
845       A->ops->mult  = MatMult_SeqSBAIJ_1_ushort;
846       A->ops->relax = MatRelax_SeqSBAIJ_ushort;
847       a->free_jshort = PETSC_TRUE;
848     }
849   }
850   PetscFunctionReturn(0);
851 }
852 
853 /*
854    This function returns an array of flags which indicate the locations of contiguous
855    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
856    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
857    Assume: sizes should be long enough to hold all the values.
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
861 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
862 {
863   PetscInt   i,j,k,row;
864   PetscTruth flg;
865 
866   PetscFunctionBegin;
867    for (i=0,j=0; i<n; j++) {
868      row = idx[i];
869      if (row%bs!=0) { /* Not the begining of a block */
870        sizes[j] = 1;
871        i++;
872      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
873        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
874        i++;
875      } else { /* Begining of the block, so check if the complete block exists */
876        flg = PETSC_TRUE;
877        for (k=1; k<bs; k++) {
878          if (row+k != idx[i+k]) { /* break in the block */
879            flg = PETSC_FALSE;
880            break;
881          }
882        }
883        if (flg) { /* No break in the bs */
884          sizes[j] = bs;
885          i+= bs;
886        } else {
887          sizes[j] = 1;
888          i++;
889        }
890      }
891    }
892    *bs_max = j;
893    PetscFunctionReturn(0);
894 }
895 
896 
897 /* Only add/insert a(i,j) with i<=j (blocks).
898    Any a(i,j) with i>j input by user is ingored.
899 */
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
903 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
904 {
905   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
906   PetscErrorCode ierr;
907   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
908   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
909   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
910   PetscInt       ridx,cidx,bs2=a->bs2;
911   MatScalar      *ap,value,*aa=a->a,*bap;
912 
913   PetscFunctionBegin;
914   for (k=0; k<m; k++) { /* loop over added rows */
915     row  = im[k];       /* row number */
916     brow = row/bs;      /* block row number */
917     if (row < 0) continue;
918 #if defined(PETSC_USE_DEBUG)
919     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
920 #endif
921     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
922     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
923     rmax = imax[brow];  /* maximum space allocated for this row */
924     nrow = ailen[brow]; /* actual length of this row */
925     low  = 0;
926 
927     for (l=0; l<n; l++) { /* loop over added columns */
928       if (in[l] < 0) continue;
929 #if defined(PETSC_USE_DEBUG)
930       if (in[l] >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap->N-1);
931 #endif
932       col = in[l];
933       bcol = col/bs;              /* block col number */
934 
935       if (brow > bcol) {
936         if (a->ignore_ltriangular){
937           continue; /* ignore lower triangular values */
938         } else {
939           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
940         }
941       }
942 
943       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
944       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
945         /* element value a(k,l) */
946         if (roworiented) {
947           value = v[l + k*n];
948         } else {
949           value = v[k + l*m];
950         }
951 
952         /* move pointer bap to a(k,l) quickly and add/insert value */
953         if (col <= lastcol) low = 0; high = nrow;
954         lastcol = col;
955         while (high-low > 7) {
956           t = (low+high)/2;
957           if (rp[t] > bcol) high = t;
958           else              low  = t;
959         }
960         for (i=low; i<high; i++) {
961           if (rp[i] > bcol) break;
962           if (rp[i] == bcol) {
963             bap  = ap +  bs2*i + bs*cidx + ridx;
964             if (is == ADD_VALUES) *bap += value;
965             else                  *bap  = value;
966             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
967             if (brow == bcol && ridx < cidx){
968               bap  = ap +  bs2*i + bs*ridx + cidx;
969               if (is == ADD_VALUES) *bap += value;
970               else                  *bap  = value;
971             }
972             goto noinsert1;
973           }
974         }
975 
976         if (nonew == 1) goto noinsert1;
977         if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
978         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
979 
980         N = nrow++ - 1; high++;
981         /* shift up all the later entries in this row */
982         for (ii=N; ii>=i; ii--) {
983           rp[ii+1] = rp[ii];
984           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
985         }
986         if (N>=i) {
987           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
988         }
989         rp[i]                      = bcol;
990         ap[bs2*i + bs*cidx + ridx] = value;
991       noinsert1:;
992         low = i;
993       }
994     }   /* end of loop over added columns */
995     ailen[brow] = nrow;
996   }   /* end of loop over added rows */
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1002 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,const MatFactorInfo *info)
1003 {
1004   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1005   Mat            outA;
1006   PetscErrorCode ierr;
1007   PetscTruth     row_identity;
1008 
1009   PetscFunctionBegin;
1010   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc");
1011   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1012   if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported");
1013   if (inA->rmap->bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap->bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */
1014 
1015   outA        = inA;
1016   inA->factor = MAT_FACTOR_ICC;
1017 
1018   ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1019   ierr = MatSeqSBAIJSetNumericFactorization(inA,row_identity);CHKERRQ(ierr);
1020 
1021   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1022   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1023   a->row = row;
1024   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1025   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1026   a->col = row;
1027 
1028   /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */
1029   if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);}
1030   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1031 
1032   if (!a->solve_work) {
1033     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1034     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1035   }
1036 
1037   ierr = MatCholeskyFactorNumeric(outA,inA,info);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 EXTERN_C_BEGIN
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1044 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1045 {
1046   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1047   PetscInt     i,nz,n;
1048 
1049   PetscFunctionBegin;
1050   nz = baij->maxnz;
1051   n  = mat->cmap->n;
1052   for (i=0; i<nz; i++) {
1053     baij->j[i] = indices[i];
1054   }
1055    baij->nz = nz;
1056    for (i=0; i<n; i++) {
1057      baij->ilen[i] = baij->imax[i];
1058    }
1059    PetscFunctionReturn(0);
1060 }
1061 EXTERN_C_END
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1065 /*@
1066   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1067   in the matrix.
1068 
1069   Input Parameters:
1070   +  mat     - the SeqSBAIJ matrix
1071   -  indices - the column indices
1072 
1073   Level: advanced
1074 
1075   Notes:
1076   This can be called if you have precomputed the nonzero structure of the
1077   matrix and want to provide it to the matrix object to improve the performance
1078   of the MatSetValues() operation.
1079 
1080   You MUST have set the correct numbers of nonzeros per row in the call to
1081   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1082 
1083   MUST be called before any calls to MatSetValues()
1084 
1085   .seealso: MatCreateSeqSBAIJ
1086 @*/
1087 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1088 {
1089   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1090 
1091   PetscFunctionBegin;
1092   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1093   PetscValidPointer(indices,2);
1094   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1095   if (f) {
1096     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1097   } else {
1098     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatCopy_SeqSBAIJ"
1105 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1106 {
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   /* If the two matrices have the same copy implementation, use fast copy. */
1111   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1112     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1113     Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1114 
1115     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
1116       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1117     }
1118     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
1119   } else {
1120     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1121     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1122     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1123   }
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1129 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1130 {
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1140 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1141 {
1142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1143   PetscFunctionBegin;
1144   *array = a->a;
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1150 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1151 {
1152   PetscFunctionBegin;
1153   PetscFunctionReturn(0);
1154  }
1155 
1156 #include "petscblaslapack.h"
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1159 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1160 {
1161   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1162   PetscErrorCode ierr;
1163   PetscInt       i,bs=Y->rmap->bs,bs2,j;
1164   PetscBLASInt   one = 1,bnz = PetscBLASIntCast(x->nz);
1165 
1166   PetscFunctionBegin;
1167   if (str == SAME_NONZERO_PATTERN) {
1168     PetscScalar alpha = a;
1169     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1170   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1171     if (y->xtoy && y->XtoY != X) {
1172       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1173       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1174     }
1175     if (!y->xtoy) { /* get xtoy */
1176       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1177       y->XtoY = X;
1178     }
1179     bs2 = bs*bs;
1180     for (i=0; i<x->nz; i++) {
1181       j = 0;
1182       while (j < bs2){
1183         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1184         j++;
1185       }
1186     }
1187     ierr = PetscInfo3(Y,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1188   } else {
1189     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1190     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1191     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1192   }
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1198 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1199 {
1200   PetscFunctionBegin;
1201   *flg = PETSC_TRUE;
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1207 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1208 {
1209    PetscFunctionBegin;
1210    *flg = PETSC_TRUE;
1211    PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1216 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1217  {
1218    PetscFunctionBegin;
1219    *flg = PETSC_FALSE;
1220    PetscFunctionReturn(0);
1221  }
1222 
1223 #undef __FUNCT__
1224 #define __FUNCT__ "MatRealPart_SeqSBAIJ"
1225 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1226 {
1227   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1228   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1229   MatScalar      *aa = a->a;
1230 
1231   PetscFunctionBegin;
1232   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ"
1238 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1239 {
1240   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1241   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1242   MatScalar      *aa = a->a;
1243 
1244   PetscFunctionBegin;
1245   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 /* -------------------------------------------------------------------*/
1250 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1251        MatGetRow_SeqSBAIJ,
1252        MatRestoreRow_SeqSBAIJ,
1253        MatMult_SeqSBAIJ_N,
1254 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1255        MatMult_SeqSBAIJ_N,       /* transpose versions are same as non-transpose versions */
1256        MatMultAdd_SeqSBAIJ_N,
1257        0,
1258        0,
1259        0,
1260 /*10*/ 0,
1261        0,
1262        MatCholeskyFactor_SeqSBAIJ,
1263        MatRelax_SeqSBAIJ,
1264        MatTranspose_SeqSBAIJ,
1265 /*15*/ MatGetInfo_SeqSBAIJ,
1266        MatEqual_SeqSBAIJ,
1267        MatGetDiagonal_SeqSBAIJ,
1268        MatDiagonalScale_SeqSBAIJ,
1269        MatNorm_SeqSBAIJ,
1270 /*20*/ 0,
1271        MatAssemblyEnd_SeqSBAIJ,
1272        MatSetOption_SeqSBAIJ,
1273        MatZeroEntries_SeqSBAIJ,
1274 /*24*/ 0,
1275        0,
1276        0,
1277        0,
1278        0,
1279 /*29*/ MatSetUpPreallocation_SeqSBAIJ,
1280        0,
1281        0,
1282        MatGetArray_SeqSBAIJ,
1283        MatRestoreArray_SeqSBAIJ,
1284 /*34*/ MatDuplicate_SeqSBAIJ,
1285        0,
1286        0,
1287        0,
1288        MatICCFactor_SeqSBAIJ,
1289 /*39*/ MatAXPY_SeqSBAIJ,
1290        MatGetSubMatrices_SeqSBAIJ,
1291        MatIncreaseOverlap_SeqSBAIJ,
1292        MatGetValues_SeqSBAIJ,
1293        MatCopy_SeqSBAIJ,
1294 /*44*/ 0,
1295        MatScale_SeqSBAIJ,
1296        0,
1297        0,
1298        0,
1299 /*49*/ 0,
1300        MatGetRowIJ_SeqSBAIJ,
1301        MatRestoreRowIJ_SeqSBAIJ,
1302        0,
1303        0,
1304 /*54*/ 0,
1305        0,
1306        0,
1307        0,
1308        MatSetValuesBlocked_SeqSBAIJ,
1309 /*59*/ MatGetSubMatrix_SeqSBAIJ,
1310        0,
1311        0,
1312        0,
1313        0,
1314 /*64*/ 0,
1315        0,
1316        0,
1317        0,
1318        0,
1319 /*69*/ MatGetRowMaxAbs_SeqSBAIJ,
1320        0,
1321        0,
1322        0,
1323        0,
1324 /*74*/ 0,
1325        0,
1326        0,
1327        0,
1328        0,
1329 /*79*/ 0,
1330        0,
1331        0,
1332 #if !defined(PETSC_USE_COMPLEX)
1333        MatGetInertia_SeqSBAIJ,
1334 #else
1335        0,
1336 #endif
1337        MatLoad_SeqSBAIJ,
1338 /*84*/ MatIsSymmetric_SeqSBAIJ,
1339        MatIsHermitian_SeqSBAIJ,
1340        MatIsStructurallySymmetric_SeqSBAIJ,
1341        0,
1342        0,
1343 /*89*/ 0,
1344        0,
1345        0,
1346        0,
1347        0,
1348 /*94*/ 0,
1349        0,
1350        0,
1351        0,
1352        0,
1353 /*99*/ 0,
1354        0,
1355        0,
1356        0,
1357        0,
1358 /*104*/0,
1359        MatRealPart_SeqSBAIJ,
1360        MatImaginaryPart_SeqSBAIJ,
1361        MatGetRowUpperTriangular_SeqSBAIJ,
1362        MatRestoreRowUpperTriangular_SeqSBAIJ,
1363 /*109*/0,
1364        0,
1365        0,
1366        0,
1367        MatMissingDiagonal_SeqSBAIJ
1368 };
1369 
1370 EXTERN_C_BEGIN
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1373 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1374 {
1375   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1376   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
1377   PetscErrorCode ierr;
1378 
1379   PetscFunctionBegin;
1380   if (aij->nonew != 1) {
1381     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1382   }
1383 
1384   /* allocate space for values if not already there */
1385   if (!aij->saved_values) {
1386     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1387   }
1388 
1389   /* copy values over */
1390   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 EXTERN_C_END
1394 
1395 EXTERN_C_BEGIN
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1398 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1399 {
1400   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1401   PetscErrorCode ierr;
1402   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
1403 
1404   PetscFunctionBegin;
1405   if (aij->nonew != 1) {
1406     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1407   }
1408   if (!aij->saved_values) {
1409     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1410   }
1411 
1412   /* copy values over */
1413   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 EXTERN_C_END
1417 
1418 EXTERN_C_BEGIN
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1421 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1422 {
1423   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1424   PetscErrorCode ierr;
1425   PetscInt       i,mbs,bs2, newbs = PetscAbs(bs);
1426   PetscTruth     skipallocation = PETSC_FALSE,flg = PETSC_FALSE;
1427 
1428   PetscFunctionBegin;
1429   B->preallocated = PETSC_TRUE;
1430   if (bs < 0) {
1431     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1432       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqSBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1433     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1434     bs   = PetscAbs(bs);
1435   }
1436   if (nnz && newbs != bs) {
1437     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
1438   }
1439   bs = newbs;
1440 
1441   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1442   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1443   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
1444   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
1445 
1446   mbs  = B->rmap->N/bs;
1447   bs2  = bs*bs;
1448 
1449   if (mbs*bs != B->rmap->N) {
1450     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1451   }
1452 
1453   if (nz == MAT_SKIP_ALLOCATION) {
1454     skipallocation = PETSC_TRUE;
1455     nz             = 0;
1456   }
1457 
1458   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1459   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1460   if (nnz) {
1461     for (i=0; i<mbs; i++) {
1462       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1463       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1464     }
1465   }
1466 
1467   B->ops->mult             = MatMult_SeqSBAIJ_N;
1468   B->ops->multadd          = MatMultAdd_SeqSBAIJ_N;
1469   B->ops->multtranspose    = MatMult_SeqSBAIJ_N;
1470   B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_N;
1471   ierr  = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_no_unroll",&flg,PETSC_NULL);CHKERRQ(ierr);
1472   if (!flg) {
1473     switch (bs) {
1474     case 1:
1475       B->ops->mult             = MatMult_SeqSBAIJ_1;
1476       B->ops->multadd          = MatMultAdd_SeqSBAIJ_1;
1477       B->ops->multtranspose    = MatMult_SeqSBAIJ_1;
1478       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1479       break;
1480     case 2:
1481       B->ops->mult             = MatMult_SeqSBAIJ_2;
1482       B->ops->multadd          = MatMultAdd_SeqSBAIJ_2;
1483       B->ops->multtranspose    = MatMult_SeqSBAIJ_2;
1484       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1485       break;
1486     case 3:
1487       B->ops->mult             = MatMult_SeqSBAIJ_3;
1488       B->ops->multadd          = MatMultAdd_SeqSBAIJ_3;
1489       B->ops->multtranspose    = MatMult_SeqSBAIJ_3;
1490       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1491       break;
1492     case 4:
1493       B->ops->mult             = MatMult_SeqSBAIJ_4;
1494       B->ops->multadd          = MatMultAdd_SeqSBAIJ_4;
1495       B->ops->multtranspose    = MatMult_SeqSBAIJ_4;
1496       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1497       break;
1498     case 5:
1499       B->ops->mult             = MatMult_SeqSBAIJ_5;
1500       B->ops->multadd          = MatMultAdd_SeqSBAIJ_5;
1501       B->ops->multtranspose    = MatMult_SeqSBAIJ_5;
1502       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1503       break;
1504     case 6:
1505       B->ops->mult             = MatMult_SeqSBAIJ_6;
1506       B->ops->multadd          = MatMultAdd_SeqSBAIJ_6;
1507       B->ops->multtranspose    = MatMult_SeqSBAIJ_6;
1508       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1509       break;
1510     case 7:
1511       B->ops->mult             = MatMult_SeqSBAIJ_7;
1512       B->ops->multadd          = MatMultAdd_SeqSBAIJ_7;
1513       B->ops->multtranspose    = MatMult_SeqSBAIJ_7;
1514       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1515       break;
1516     }
1517   }
1518 
1519   b->mbs = mbs;
1520   b->nbs = mbs;
1521   if (!skipallocation) {
1522     if (!b->imax) {
1523       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1524       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
1525     }
1526     if (!nnz) {
1527       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1528       else if (nz <= 0)        nz = 1;
1529       for (i=0; i<mbs; i++) {
1530         b->imax[i] = nz;
1531       }
1532       nz = nz*mbs; /* total nz */
1533     } else {
1534       nz = 0;
1535       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1536     }
1537     /* b->ilen will count nonzeros in each block row so far. */
1538     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1539     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1540 
1541     /* allocate the matrix space */
1542     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
1543     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
1544     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
1545     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1546     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1547     b->singlemalloc = PETSC_TRUE;
1548 
1549     /* pointer to beginning of each row */
1550     b->i[0] = 0;
1551     for (i=1; i<mbs+1; i++) {
1552       b->i[i] = b->i[i-1] + b->imax[i-1];
1553     }
1554     b->free_a     = PETSC_TRUE;
1555     b->free_ij    = PETSC_TRUE;
1556   } else {
1557     b->free_a     = PETSC_FALSE;
1558     b->free_ij    = PETSC_FALSE;
1559   }
1560 
1561   B->rmap->bs               = bs;
1562   b->bs2              = bs2;
1563   b->nz             = 0;
1564   b->maxnz          = nz*bs2;
1565 
1566   b->inew             = 0;
1567   b->jnew             = 0;
1568   b->anew             = 0;
1569   b->a2anew           = 0;
1570   b->permute          = PETSC_FALSE;
1571   PetscFunctionReturn(0);
1572 }
1573 EXTERN_C_END
1574 
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatSeqBAIJSetNumericFactorization"
1577 /*
1578    This is used to set the numeric factorization for both Cholesky and ICC symbolic factorization
1579 */
1580 PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat B,PetscTruth natural)
1581 {
1582   PetscErrorCode ierr;
1583   PetscTruth     flg = PETSC_FALSE;
1584   PetscInt       bs = B->rmap->bs;
1585 
1586   PetscFunctionBegin;
1587   ierr    = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_no_unroll",&flg,PETSC_NULL);CHKERRQ(ierr);
1588   if (flg) bs = 8;
1589 
1590   if (!natural) {
1591     switch (bs) {
1592     case 1:
1593       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1594       break;
1595     case 2:
1596       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1597       break;
1598     case 3:
1599       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1600       break;
1601     case 4:
1602       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1603       break;
1604     case 5:
1605       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1606       break;
1607     case 6:
1608       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1609       break;
1610     case 7:
1611       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1612       break;
1613     default:
1614       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N;
1615       break;
1616     }
1617   } else {
1618     switch (bs) {
1619     case 1:
1620       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1621       break;
1622     case 2:
1623       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1624       break;
1625     case 3:
1626       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1627       break;
1628     case 4:
1629       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1630       break;
1631     case 5:
1632       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1633       break;
1634     case 6:
1635       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1636       break;
1637     case 7:
1638       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1639       break;
1640     default:
1641       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1642       break;
1643     }
1644   }
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 EXTERN_C_BEGIN
1649 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1650 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1651 EXTERN_C_END
1652 
1653 
1654 EXTERN_C_BEGIN
1655 #undef __FUNCT__
1656 #define __FUNCT__ "MatGetFactor_seqsbaij_petsc"
1657 PetscErrorCode MatGetFactor_seqsbaij_petsc(Mat A,MatFactorType ftype,Mat *B)
1658 {
1659   PetscInt           n = A->rmap->n;
1660   PetscErrorCode     ierr;
1661 
1662   PetscFunctionBegin;
1663   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
1664   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
1665   if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
1666     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
1667     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
1668     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ;
1669     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqSBAIJ;
1670   } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
1671   (*B)->factor = ftype;
1672   PetscFunctionReturn(0);
1673 }
1674 EXTERN_C_END
1675 
1676 EXTERN_C_BEGIN
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatGetFactorAvailable_seqsbaij_petsc"
1679 PetscErrorCode MatGetFactorAvailable_seqsbaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
1680 {
1681   PetscFunctionBegin;
1682   if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
1683     *flg = PETSC_TRUE;
1684   } else {
1685     *flg = PETSC_FALSE;
1686   }
1687   PetscFunctionReturn(0);
1688 }
1689 EXTERN_C_END
1690 
1691 EXTERN_C_BEGIN
1692 #if defined(PETSC_HAVE_MUMPS)
1693 extern PetscErrorCode MatGetFactor_seqsbaij_mumps(Mat,MatFactorType,Mat*);
1694 #endif
1695 #if defined(PETSC_HAVE_SPOOLES)
1696 extern PetscErrorCode MatGetFactor_seqsbaij_spooles(Mat,MatFactorType,Mat*);
1697 #endif
1698 #if defined(PETSC_HAVE_PASTIX)
1699 extern PetscErrorCode MatGetFactor_seqsbaij_pastix(Mat,MatFactorType,Mat*);
1700 #endif
1701 EXTERN_C_END
1702 
1703 /*MC
1704   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1705   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1706 
1707   Options Database Keys:
1708   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1709 
1710   Level: beginner
1711 
1712   .seealso: MatCreateSeqSBAIJ
1713 M*/
1714 
1715 EXTERN_C_BEGIN
1716 #undef __FUNCT__
1717 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1718 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1719 {
1720   Mat_SeqSBAIJ   *b;
1721   PetscErrorCode ierr;
1722   PetscMPIInt    size;
1723   PetscTruth     no_unroll = PETSC_FALSE,no_inode = PETSC_FALSE;
1724 
1725   PetscFunctionBegin;
1726   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
1727   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1728 
1729   ierr    = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1730   B->data = (void*)b;
1731   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1732   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1733   B->ops->view        = MatView_SeqSBAIJ;
1734   B->mapping          = 0;
1735   b->row              = 0;
1736   b->icol             = 0;
1737   b->reallocs         = 0;
1738   b->saved_values     = 0;
1739   b->inode.limit      = 5;
1740   b->inode.max_limit  = 5;
1741 
1742   b->roworiented      = PETSC_TRUE;
1743   b->nonew            = 0;
1744   b->diag             = 0;
1745   b->solve_work       = 0;
1746   b->mult_work        = 0;
1747   B->spptr            = 0;
1748   b->keepnonzeropattern   = PETSC_FALSE;
1749   b->xtoy             = 0;
1750   b->XtoY             = 0;
1751 
1752   b->inew             = 0;
1753   b->jnew             = 0;
1754   b->anew             = 0;
1755   b->a2anew           = 0;
1756   b->permute          = PETSC_FALSE;
1757 
1758   b->ignore_ltriangular = PETSC_FALSE;
1759   ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_ignore_lower_triangular",&b->ignore_ltriangular,PETSC_NULL);CHKERRQ(ierr);
1760 
1761   b->getrow_utriangular = PETSC_FALSE;
1762   ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_getrow_uppertriangular",&b->getrow_utriangular,PETSC_NULL);CHKERRQ(ierr);
1763 
1764 #if defined(PETSC_HAVE_PASTIX)
1765   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_pastix_C",
1766 					   "MatGetFactor_seqsbaij_pastix",
1767 					   MatGetFactor_seqsbaij_pastix);CHKERRQ(ierr);
1768 #endif
1769 #if defined(PETSC_HAVE_SPOOLES)
1770   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_spooles_C",
1771                                      "MatGetFactor_seqsbaij_spooles",
1772                                      MatGetFactor_seqsbaij_spooles);CHKERRQ(ierr);
1773 #endif
1774 #if defined(PETSC_HAVE_MUMPS)
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_mumps_C",
1776                                      "MatGetFactor_seqsbaij_mumps",
1777                                      MatGetFactor_seqsbaij_mumps);CHKERRQ(ierr);
1778 #endif
1779   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqsbaij_petsc_C",
1780                                      "MatGetFactorAvailable_seqsbaij_petsc",
1781                                      MatGetFactorAvailable_seqsbaij_petsc);CHKERRQ(ierr);
1782   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_petsc_C",
1783                                      "MatGetFactor_seqsbaij_petsc",
1784                                      MatGetFactor_seqsbaij_petsc);CHKERRQ(ierr);
1785   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1786                                      "MatStoreValues_SeqSBAIJ",
1787                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1788   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1789                                      "MatRetrieveValues_SeqSBAIJ",
1790                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1791   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1792                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1793                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1794   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1795                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1796                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1797   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1798                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1799                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1801                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1802                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1803 
1804   B->symmetric                  = PETSC_TRUE;
1805   B->structurally_symmetric     = PETSC_TRUE;
1806   B->symmetric_set              = PETSC_TRUE;
1807   B->structurally_symmetric_set = PETSC_TRUE;
1808   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr);
1809 
1810   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for SEQSBAIJ matrix","Mat");CHKERRQ(ierr);
1811     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for inodes (slower)",PETSC_NULL,no_unroll,&no_unroll,PETSC_NULL);CHKERRQ(ierr);
1812     if (no_unroll) {ierr = PetscInfo(B,"Not using Inode routines due to -mat_no_unroll\n");CHKERRQ(ierr);}
1813     ierr = PetscOptionsTruth("-mat_no_inode","Do not optimize for inodes (slower)",PETSC_NULL,no_inode,&no_inode,PETSC_NULL);CHKERRQ(ierr);
1814     if (no_inode) {ierr = PetscInfo(B,"Not using Inode routines due to -mat_no_inode\n");CHKERRQ(ierr);}
1815     ierr = PetscOptionsInt("-mat_inode_limit","Do not use inodes larger then this value",PETSC_NULL,b->inode.limit,&b->inode.limit,PETSC_NULL);CHKERRQ(ierr);
1816   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1817   b->inode.use = (PetscTruth)(!(no_unroll || no_inode));
1818   if (b->inode.limit > b->inode.max_limit) b->inode.limit = b->inode.max_limit;
1819 
1820   PetscFunctionReturn(0);
1821 }
1822 EXTERN_C_END
1823 
1824 #undef __FUNCT__
1825 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1826 /*@C
1827    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1828    compressed row) format.  For good matrix assembly performance the
1829    user should preallocate the matrix storage by setting the parameter nz
1830    (or the array nnz).  By setting these parameters accurately, performance
1831    during matrix assembly can be increased by more than a factor of 50.
1832 
1833    Collective on Mat
1834 
1835    Input Parameters:
1836 +  A - the symmetric matrix
1837 .  bs - size of block
1838 .  nz - number of block nonzeros per block row (same for all rows)
1839 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1840          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1841 
1842    Options Database Keys:
1843 .   -mat_no_unroll - uses code that does not unroll the loops in the
1844                      block calculations (much slower)
1845 .    -mat_block_size - size of the blocks to use (only works if a negative bs is passed in
1846 
1847    Level: intermediate
1848 
1849    Notes:
1850    Specify the preallocated storage with either nz or nnz (not both).
1851    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1852    allocation.  For additional details, see the users manual chapter on
1853    matrices.
1854 
1855    You can call MatGetInfo() to get information on how effective the preallocation was;
1856    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1857    You can also run with the option -info and look for messages with the string
1858    malloc in them to see if additional memory allocation was needed.
1859 
1860    If the nnz parameter is given then the nz parameter is ignored
1861 
1862 
1863 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1864 @*/
1865 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1866 {
1867   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1868 
1869   PetscFunctionBegin;
1870   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1871   if (f) {
1872     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1873   }
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatCreateSeqSBAIJ"
1879 /*@C
1880    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1881    compressed row) format.  For good matrix assembly performance the
1882    user should preallocate the matrix storage by setting the parameter nz
1883    (or the array nnz).  By setting these parameters accurately, performance
1884    during matrix assembly can be increased by more than a factor of 50.
1885 
1886    Collective on MPI_Comm
1887 
1888    Input Parameters:
1889 +  comm - MPI communicator, set to PETSC_COMM_SELF
1890 .  bs - size of block
1891 .  m - number of rows, or number of columns
1892 .  nz - number of block nonzeros per block row (same for all rows)
1893 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1894          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1895 
1896    Output Parameter:
1897 .  A - the symmetric matrix
1898 
1899    Options Database Keys:
1900 .   -mat_no_unroll - uses code that does not unroll the loops in the
1901                      block calculations (much slower)
1902 .    -mat_block_size - size of the blocks to use
1903 
1904    Level: intermediate
1905 
1906    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1907    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1908    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1909 
1910    Notes:
1911    The number of rows and columns must be divisible by blocksize.
1912    This matrix type does not support complex Hermitian operation.
1913 
1914    Specify the preallocated storage with either nz or nnz (not both).
1915    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1916    allocation.  For additional details, see the users manual chapter on
1917    matrices.
1918 
1919    If the nnz parameter is given then the nz parameter is ignored
1920 
1921 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1922 @*/
1923 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1924 {
1925   PetscErrorCode ierr;
1926 
1927   PetscFunctionBegin;
1928   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1929   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1930   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1931   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1937 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1938 {
1939   Mat            C;
1940   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1941   PetscErrorCode ierr;
1942   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1943 
1944   PetscFunctionBegin;
1945   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1946 
1947   *B = 0;
1948   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1949   ierr = MatSetSizes(C,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
1950   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1951   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1952   c    = (Mat_SeqSBAIJ*)C->data;
1953 
1954   C->preallocated       = PETSC_TRUE;
1955   C->factor             = A->factor;
1956   c->row                = 0;
1957   c->icol               = 0;
1958   c->saved_values       = 0;
1959   c->keepnonzeropattern = a->keepnonzeropattern;
1960   C->assembled          = PETSC_TRUE;
1961 
1962   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
1963   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
1964   c->bs2  = a->bs2;
1965   c->mbs  = a->mbs;
1966   c->nbs  = a->nbs;
1967 
1968   ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr);
1969   for (i=0; i<mbs; i++) {
1970     c->imax[i] = a->imax[i];
1971     c->ilen[i] = a->ilen[i];
1972   }
1973 
1974   /* allocate the matrix space */
1975   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
1976     ierr = PetscMalloc(bs2*nz*sizeof(MatScalar),&c->a);CHKERRQ(ierr);
1977     ierr = PetscLogObjectMemory(C,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1978     c->singlemalloc = PETSC_FALSE;
1979     c->free_ij      = PETSC_FALSE;
1980     c->parent       = A;
1981     ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
1982     ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1983     ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1984   } else {
1985     ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
1986     ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1987     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
1988     c->singlemalloc = PETSC_TRUE;
1989     c->free_ij      = PETSC_TRUE;
1990   }
1991   if (mbs > 0) {
1992     if (cpvalues != MAT_SHARE_NONZERO_PATTERN) {
1993       ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1994     }
1995     if (cpvalues == MAT_COPY_VALUES) {
1996       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1997     } else {
1998       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1999     }
2000     if (a->jshort) {
2001       if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
2002         c->jshort      = a->jshort;
2003         c->free_jshort = PETSC_FALSE;
2004       } else {
2005         ierr = PetscMalloc(nz*sizeof(unsigned short),&c->jshort);CHKERRQ(ierr);
2006         ierr = PetscMemcpy(c->jshort,a->jshort,nz*sizeof(unsigned short));CHKERRQ(ierr);
2007         c->free_jshort = PETSC_TRUE;
2008       }
2009     }
2010   }
2011 
2012   c->roworiented = a->roworiented;
2013   c->nonew       = a->nonew;
2014 
2015   if (a->diag) {
2016     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2017     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2018     for (i=0; i<mbs; i++) {
2019       c->diag[i] = a->diag[i];
2020     }
2021   } else c->diag  = 0;
2022   c->nz           = a->nz;
2023   c->maxnz        = a->maxnz;
2024   c->solve_work   = 0;
2025   c->mult_work    = 0;
2026   c->free_a       = PETSC_TRUE;
2027   *B = C;
2028   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 #undef __FUNCT__
2033 #define __FUNCT__ "MatLoad_SeqSBAIJ"
2034 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2035 {
2036   Mat_SeqSBAIJ   *a;
2037   Mat            B;
2038   PetscErrorCode ierr;
2039   int            fd;
2040   PetscMPIInt    size;
2041   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2042   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
2043   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2044   PetscInt       *masked,nmask,tmp,bs2,ishift;
2045   PetscScalar    *aa;
2046   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2047 
2048   PetscFunctionBegin;
2049   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2050   bs2  = bs*bs;
2051 
2052   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2053   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2054   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2055   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2056   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2057   M = header[1]; N = header[2]; nz = header[3];
2058 
2059   if (header[3] < 0) {
2060     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
2061   }
2062 
2063   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2064 
2065   /*
2066      This code adds extra rows to make sure the number of rows is
2067     divisible by the blocksize
2068   */
2069   mbs        = M/bs;
2070   extra_rows = bs - M + bs*(mbs);
2071   if (extra_rows == bs) extra_rows = 0;
2072   else                  mbs++;
2073   if (extra_rows) {
2074     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2075   }
2076 
2077   /* read in row lengths */
2078   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2079   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2080   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2081 
2082   /* read in column indices */
2083   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2084   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2085   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2086 
2087   /* loop over row lengths determining block row lengths */
2088   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
2089   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2090   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2091   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2092   masked   = mask + mbs;
2093   rowcount = 0; nzcount = 0;
2094   for (i=0; i<mbs; i++) {
2095     nmask = 0;
2096     for (j=0; j<bs; j++) {
2097       kmax = rowlengths[rowcount];
2098       for (k=0; k<kmax; k++) {
2099         tmp = jj[nzcount++]/bs;   /* block col. index */
2100         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
2101       }
2102       rowcount++;
2103     }
2104     s_browlengths[i] += nmask;
2105 
2106     /* zero out the mask elements we set */
2107     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2108   }
2109 
2110   /* create our matrix */
2111   ierr = MatCreate(comm,&B);CHKERRQ(ierr);
2112   ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2113   ierr = MatSetType(B,type);CHKERRQ(ierr);
2114   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
2115   a = (Mat_SeqSBAIJ*)B->data;
2116 
2117   /* set matrix "i" values */
2118   a->i[0] = 0;
2119   for (i=1; i<= mbs; i++) {
2120     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
2121     a->ilen[i-1] = s_browlengths[i-1];
2122   }
2123   a->nz = a->i[mbs];
2124 
2125   /* read in nonzero values */
2126   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2127   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2128   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2129 
2130   /* set "a" and "j" values into matrix */
2131   nzcount = 0; jcount = 0;
2132   for (i=0; i<mbs; i++) {
2133     nzcountb = nzcount;
2134     nmask    = 0;
2135     for (j=0; j<bs; j++) {
2136       kmax = rowlengths[i*bs+j];
2137       for (k=0; k<kmax; k++) {
2138         tmp = jj[nzcount++]/bs; /* block col. index */
2139         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
2140       }
2141     }
2142     /* sort the masked values */
2143     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2144 
2145     /* set "j" values into matrix */
2146     maskcount = 1;
2147     for (j=0; j<nmask; j++) {
2148       a->j[jcount++]  = masked[j];
2149       mask[masked[j]] = maskcount++;
2150     }
2151 
2152     /* set "a" values into matrix */
2153     ishift = bs2*a->i[i];
2154     for (j=0; j<bs; j++) {
2155       kmax = rowlengths[i*bs+j];
2156       for (k=0; k<kmax; k++) {
2157         tmp       = jj[nzcountb]/bs ; /* block col. index */
2158         if (tmp >= i){
2159           block     = mask[tmp] - 1;
2160           point     = jj[nzcountb] - bs*tmp;
2161           idx       = ishift + bs2*block + j + bs*point;
2162           a->a[idx] = aa[nzcountb];
2163         }
2164         nzcountb++;
2165       }
2166     }
2167     /* zero out the mask elements we set */
2168     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2169   }
2170   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2171 
2172   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2173   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
2174   ierr = PetscFree(aa);CHKERRQ(ierr);
2175   ierr = PetscFree(jj);CHKERRQ(ierr);
2176   ierr = PetscFree(mask);CHKERRQ(ierr);
2177 
2178   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2179   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2180   ierr = MatView_Private(B);CHKERRQ(ierr);
2181   *A = B;
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays"
2187 /*@
2188      MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements
2189               (upper triangular entries in CSR format) provided by the user.
2190 
2191      Collective on MPI_Comm
2192 
2193    Input Parameters:
2194 +  comm - must be an MPI communicator of size 1
2195 .  bs - size of block
2196 .  m - number of rows
2197 .  n - number of columns
2198 .  i - row indices
2199 .  j - column indices
2200 -  a - matrix values
2201 
2202    Output Parameter:
2203 .  mat - the matrix
2204 
2205    Level: intermediate
2206 
2207    Notes:
2208        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2209     once the matrix is destroyed
2210 
2211        You cannot set new nonzero locations into this matrix, that will generate an error.
2212 
2213        The i and j indices are 0 based
2214 
2215 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ()
2216 
2217 @*/
2218 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2219 {
2220   PetscErrorCode ierr;
2221   PetscInt       ii;
2222   Mat_SeqSBAIJ   *sbaij;
2223 
2224   PetscFunctionBegin;
2225   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2226   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2227 
2228   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2229   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2230   ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr);
2231   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2232   sbaij = (Mat_SeqSBAIJ*)(*mat)->data;
2233   ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr);
2234 
2235   sbaij->i = i;
2236   sbaij->j = j;
2237   sbaij->a = a;
2238   sbaij->singlemalloc = PETSC_FALSE;
2239   sbaij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2240   sbaij->free_a       = PETSC_FALSE;
2241   sbaij->free_ij      = PETSC_FALSE;
2242 
2243   for (ii=0; ii<m; ii++) {
2244     sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii];
2245 #if defined(PETSC_USE_DEBUG)
2246     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2247 #endif
2248   }
2249 #if defined(PETSC_USE_DEBUG)
2250   for (ii=0; ii<sbaij->i[m]; ii++) {
2251     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2252     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2253   }
2254 #endif
2255 
2256   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2257   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 
2262 
2263 
2264 
2265