xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 4e0d8c253c5baa1c48327ddb2b78bf7a43f58497)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/inline/spops.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
19 {
20   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       *diag,*jj = a->j,i;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38   PetscErrorCode ierr;
39   PetscInt       i;
40 
41   PetscFunctionBegin;
42   if (!a->diag) {
43     ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
44   }
45   for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   *nn = n;
59   if (!ia) PetscFunctionReturn(0);
60   if (!blockcompressed) {
61     /* malloc & create the natural set of indices */
62     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
63     for (i=0; i<n+1; i++) {
64       for (j=0; j<bs; j++) {
65         *ia[i*bs+j] = a->i[i]*bs+j+oshift;
66       }
67     }
68     for (i=0; i<nz; i++) {
69       for (j=0; j<bs; j++) {
70         *ja[i*bs+j] = a->j[i]*bs+j+oshift;
71       }
72     }
73   } else { /* blockcompressed */
74     if (oshift == 1) {
75       /* temporarily add 1 to i and j indices */
76       for (i=0; i<nz; i++) a->j[i]++;
77       for (i=0; i<n+1; i++) a->i[i]++;
78     }
79     *ia = a->i; *ja = a->j;
80   }
81 
82   PetscFunctionReturn(0);
83 }
84 
85 #undef __FUNCT__
86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
87 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
88 {
89   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
90   PetscInt     i,n = a->mbs,nz = a->i[n];
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   if (!ia) PetscFunctionReturn(0);
95 
96   if (!blockcompressed) {
97     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
98   } else if (oshift == 1) { /* blockcompressed */
99     for (i=0; i<nz; i++) a->j[i]--;
100     for (i=0; i<n+1; i++) a->i[i]--;
101   }
102 
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
108 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
109 {
110   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
111   PetscErrorCode ierr;
112 
113   PetscFunctionBegin;
114 #if defined(PETSC_USE_LOG)
115   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz);
116 #endif
117   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
118   if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);}
119   if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);}
120   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
121   ierr = PetscFree(a->diag);CHKERRQ(ierr);
122   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
123   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
124   ierr = PetscFree(a->relax_work);CHKERRQ(ierr);
125   ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
126   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
127   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
128   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
129 
130   ierr = PetscFree(a->inew);CHKERRQ(ierr);
131   ierr = PetscFree(a);CHKERRQ(ierr);
132 
133   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
134   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
135   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
136   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
137   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
138   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
139   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 #undef __FUNCT__
144 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg)
146 {
147   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
148   PetscErrorCode ierr;
149 
150   PetscFunctionBegin;
151   switch (op) {
152   case MAT_ROW_ORIENTED:
153     a->roworiented = flg;
154     break;
155   case MAT_COLUMNS_SORTED:
156     a->sorted = flg;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = flg;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew = (flg ? 1 : 0);
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew = (flg ? -1 : 0);
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew = (flg ? -2 : 0);
169     break;
170   case MAT_ROWS_SORTED:
171   case MAT_NEW_DIAGONALS:
172   case MAT_IGNORE_OFF_PROC_ENTRIES:
173   case MAT_USE_HASH_TABLE:
174     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
175     break;
176   case MAT_HERMITIAN:
177     if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
178   case MAT_SYMMETRIC:
179   case MAT_STRUCTURALLY_SYMMETRIC:
180   case MAT_SYMMETRY_ETERNAL:
181     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
182     ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr);
183     break;
184   case MAT_IGNORE_LOWER_TRIANGULAR:
185     a->ignore_ltriangular = flg;
186     break;
187   case MAT_ERROR_LOWER_TRIANGULAR:
188     a->ignore_ltriangular = flg;
189     break;
190   case MAT_GETROW_UPPERTRIANGULAR:
191     a->getrow_utriangular = flg;
192     break;
193   default:
194     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 #undef __FUNCT__
200 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
201 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
202 {
203   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
204   PetscErrorCode ierr;
205   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
206   MatScalar      *aa,*aa_i;
207   PetscScalar    *v_i;
208 
209   PetscFunctionBegin;
210   if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()");
211   /* Get the upper triangular part of the row */
212   bs  = A->rmap.bs;
213   ai  = a->i;
214   aj  = a->j;
215   aa  = a->a;
216   bs2 = a->bs2;
217 
218   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
219 
220   bn  = row/bs;   /* Block number */
221   bp  = row % bs; /* Block position */
222   M   = ai[bn+1] - ai[bn];
223   *ncols = bs*M;
224 
225   if (v) {
226     *v = 0;
227     if (*ncols) {
228       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
229       for (i=0; i<M; i++) { /* for each block in the block row */
230         v_i  = *v + i*bs;
231         aa_i = aa + bs2*(ai[bn] + i);
232         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
233       }
234     }
235   }
236 
237   if (cols) {
238     *cols = 0;
239     if (*ncols) {
240       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
241       for (i=0; i<M; i++) { /* for each block in the block row */
242         cols_i = *cols + i*bs;
243         itmp  = bs*aj[ai[bn] + i];
244         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
245       }
246     }
247   }
248 
249   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
250   /* this segment is currently removed, so only entries in the upper triangle are obtained */
251 #ifdef column_search
252   v_i    = *v    + M*bs;
253   cols_i = *cols + M*bs;
254   for (i=0; i<bn; i++){ /* for each block row */
255     M = ai[i+1] - ai[i];
256     for (j=0; j<M; j++){
257       itmp = aj[ai[i] + j];    /* block column value */
258       if (itmp == bn){
259         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
260         for (k=0; k<bs; k++) {
261           *cols_i++ = i*bs+k;
262           *v_i++    = aa_i[k];
263         }
264         *ncols += bs;
265         break;
266       }
267     }
268   }
269 #endif
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
275 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
276 {
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
281   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ"
287 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
288 {
289   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
290 
291   PetscFunctionBegin;
292   a->getrow_utriangular = PETSC_TRUE;
293   PetscFunctionReturn(0);
294 }
295 #undef __FUNCT__
296 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ"
297 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
298 {
299   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
300 
301   PetscFunctionBegin;
302   a->getrow_utriangular = PETSC_FALSE;
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
308 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
309 {
310   PetscErrorCode ierr;
311   PetscFunctionBegin;
312   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNCT__
317 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
318 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
319 {
320   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
321   PetscErrorCode    ierr;
322   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
323   const char        *name;
324   PetscViewerFormat format;
325 
326   PetscFunctionBegin;
327   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
328   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
329   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
330     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
331   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
332     if (A->factor && bs>1){
333       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr);
334       PetscFunctionReturn(0);
335     }
336     Mat aij;
337     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
338     ierr = MatView(aij,viewer);CHKERRQ(ierr);
339     ierr = MatDestroy(aij);CHKERRQ(ierr);
340   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
341     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
342     for (i=0; i<a->mbs; i++) {
343       for (j=0; j<bs; j++) {
344         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
345         for (k=a->i[i]; k<a->i[i+1]; k++) {
346           for (l=0; l<bs; l++) {
347 #if defined(PETSC_USE_COMPLEX)
348             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
349               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
350                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
351             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
352               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
353                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
354             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
355               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
356             }
357 #else
358             if (a->a[bs2*k + l*bs + j] != 0.0) {
359               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
360             }
361 #endif
362           }
363         }
364         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
365       }
366     }
367     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
368   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
369      PetscFunctionReturn(0);
370   } else {
371     if (A->factor && bs>1){
372       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr);
373     }
374     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
375     for (i=0; i<a->mbs; i++) {
376       for (j=0; j<bs; j++) {
377         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
378         for (k=a->i[i]; k<a->i[i+1]; k++) {
379           for (l=0; l<bs; l++) {
380 #if defined(PETSC_USE_COMPLEX)
381             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
383                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
384             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
385               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
386                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
387             } else {
388               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
389             }
390 #else
391             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
392 #endif
393           }
394         }
395         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
396       }
397     }
398     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
399   }
400   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
406 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
407 {
408   Mat            A = (Mat) Aa;
409   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
410   PetscErrorCode ierr;
411   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
412   PetscMPIInt    rank;
413   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
414   MatScalar      *aa;
415   MPI_Comm       comm;
416   PetscViewer    viewer;
417 
418   PetscFunctionBegin;
419   /*
420     This is nasty. If this is called from an originally parallel matrix
421     then all processes call this,but only the first has the matrix so the
422     rest should return immediately.
423   */
424   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
425   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
426   if (rank) PetscFunctionReturn(0);
427 
428   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
429 
430   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
431   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
432 
433   /* loop over matrix elements drawing boxes */
434   color = PETSC_DRAW_BLUE;
435   for (i=0,row=0; i<mbs; i++,row+=bs) {
436     for (j=a->i[i]; j<a->i[i+1]; j++) {
437       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
438       x_l = a->j[j]*bs; x_r = x_l + 1.0;
439       aa = a->a + j*bs2;
440       for (k=0; k<bs; k++) {
441         for (l=0; l<bs; l++) {
442           if (PetscRealPart(*aa++) >=  0.) continue;
443           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
444         }
445       }
446     }
447   }
448   color = PETSC_DRAW_CYAN;
449   for (i=0,row=0; i<mbs; i++,row+=bs) {
450     for (j=a->i[i]; j<a->i[i+1]; j++) {
451       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
452       x_l = a->j[j]*bs; x_r = x_l + 1.0;
453       aa = a->a + j*bs2;
454       for (k=0; k<bs; k++) {
455         for (l=0; l<bs; l++) {
456           if (PetscRealPart(*aa++) != 0.) continue;
457           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
458         }
459       }
460     }
461   }
462 
463   color = PETSC_DRAW_RED;
464   for (i=0,row=0; i<mbs; i++,row+=bs) {
465     for (j=a->i[i]; j<a->i[i+1]; j++) {
466       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
467       x_l = a->j[j]*bs; x_r = x_l + 1.0;
468       aa = a->a + j*bs2;
469       for (k=0; k<bs; k++) {
470         for (l=0; l<bs; l++) {
471           if (PetscRealPart(*aa++) <= 0.) continue;
472           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
473         }
474       }
475     }
476   }
477   PetscFunctionReturn(0);
478 }
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
482 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
483 {
484   PetscErrorCode ierr;
485   PetscReal      xl,yl,xr,yr,w,h;
486   PetscDraw      draw;
487   PetscTruth     isnull;
488 
489   PetscFunctionBegin;
490   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
491   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
492 
493   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
494   xr  = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
495   xr += w;    yr += h;  xl = -w;     yl = -h;
496   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
497   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
498   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatView_SeqSBAIJ"
504 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
505 {
506   PetscErrorCode ierr;
507   PetscTruth     iascii,isdraw;
508 
509   PetscFunctionBegin;
510   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
511   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
512   if (iascii){
513     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
514   } else if (isdraw) {
515     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
516   } else {
517     Mat B;
518     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
519     ierr = MatView(B,viewer);CHKERRQ(ierr);
520     ierr = MatDestroy(B);CHKERRQ(ierr);
521   }
522   PetscFunctionReturn(0);
523 }
524 
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
528 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
529 {
530   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
531   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
532   PetscInt     *ai = a->i,*ailen = a->ilen;
533   PetscInt     brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
534   MatScalar    *ap,*aa = a->a;
535 
536   PetscFunctionBegin;
537   for (k=0; k<m; k++) { /* loop over rows */
538     row  = im[k]; brow = row/bs;
539     if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */
540     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
541     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
542     nrow = ailen[brow];
543     for (l=0; l<n; l++) { /* loop over columns */
544       if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */
545       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
546       col  = in[l] ;
547       bcol = col/bs;
548       cidx = col%bs;
549       ridx = row%bs;
550       high = nrow;
551       low  = 0; /* assume unsorted */
552       while (high-low > 5) {
553         t = (low+high)/2;
554         if (rp[t] > bcol) high = t;
555         else             low  = t;
556       }
557       for (i=low; i<high; i++) {
558         if (rp[i] > bcol) break;
559         if (rp[i] == bcol) {
560           *v++ = ap[bs2*i+bs*cidx+ridx];
561           goto finished;
562         }
563       }
564       *v++ = 0.0;
565       finished:;
566     }
567   }
568   PetscFunctionReturn(0);
569 }
570 
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
574 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
575 {
576   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
577   PetscErrorCode  ierr;
578   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
579   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
580   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
581   PetscTruth      roworiented=a->roworiented;
582   const MatScalar *value = v;
583   MatScalar       *ap,*aa = a->a,*bap;
584 
585   PetscFunctionBegin;
586   if (roworiented) {
587     stepval = (n-1)*bs;
588   } else {
589     stepval = (m-1)*bs;
590   }
591   for (k=0; k<m; k++) { /* loop over added rows */
592     row  = im[k];
593     if (row < 0) continue;
594 #if defined(PETSC_USE_DEBUG)
595     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
596 #endif
597     rp   = aj + ai[row];
598     ap   = aa + bs2*ai[row];
599     rmax = imax[row];
600     nrow = ailen[row];
601     low  = 0;
602     high = nrow;
603     for (l=0; l<n; l++) { /* loop over added columns */
604       if (in[l] < 0) continue;
605       col = in[l];
606 #if defined(PETSC_USE_DEBUG)
607       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
608 #endif
609       if (col < row) continue; /* ignore lower triangular block */
610       if (roworiented) {
611         value = v + k*(stepval+bs)*bs + l*bs;
612       } else {
613         value = v + l*(stepval+bs)*bs + k*bs;
614       }
615       if (col <= lastcol) low = 0; else high = nrow;
616       lastcol = col;
617       while (high-low > 7) {
618         t = (low+high)/2;
619         if (rp[t] > col) high = t;
620         else             low  = t;
621       }
622       for (i=low; i<high; i++) {
623         if (rp[i] > col) break;
624         if (rp[i] == col) {
625           bap  = ap +  bs2*i;
626           if (roworiented) {
627             if (is == ADD_VALUES) {
628               for (ii=0; ii<bs; ii++,value+=stepval) {
629                 for (jj=ii; jj<bs2; jj+=bs) {
630                   bap[jj] += *value++;
631                 }
632               }
633             } else {
634               for (ii=0; ii<bs; ii++,value+=stepval) {
635                 for (jj=ii; jj<bs2; jj+=bs) {
636                   bap[jj] = *value++;
637                 }
638                }
639             }
640           } else {
641             if (is == ADD_VALUES) {
642               for (ii=0; ii<bs; ii++,value+=stepval) {
643                 for (jj=0; jj<bs; jj++) {
644                   *bap++ += *value++;
645                 }
646               }
647             } else {
648               for (ii=0; ii<bs; ii++,value+=stepval) {
649                 for (jj=0; jj<bs; jj++) {
650                   *bap++  = *value++;
651                 }
652               }
653             }
654           }
655           goto noinsert2;
656         }
657       }
658       if (nonew == 1) goto noinsert2;
659       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
660       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
661       N = nrow++ - 1; high++;
662       /* shift up all the later entries in this row */
663       for (ii=N; ii>=i; ii--) {
664         rp[ii+1] = rp[ii];
665         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
666       }
667       if (N >= i) {
668         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
669       }
670       rp[i] = col;
671       bap   = ap +  bs2*i;
672       if (roworiented) {
673         for (ii=0; ii<bs; ii++,value+=stepval) {
674           for (jj=ii; jj<bs2; jj+=bs) {
675             bap[jj] = *value++;
676           }
677         }
678       } else {
679         for (ii=0; ii<bs; ii++,value+=stepval) {
680           for (jj=0; jj<bs; jj++) {
681             *bap++  = *value++;
682           }
683         }
684        }
685     noinsert2:;
686       low = i;
687     }
688     ailen[row] = nrow;
689   }
690    PetscFunctionReturn(0);
691 }
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
695 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
696 {
697   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
698   PetscErrorCode ierr;
699   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
700   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
701   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
702   MatScalar      *aa = a->a,*ap;
703 
704   PetscFunctionBegin;
705   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
706 
707   if (m) rmax = ailen[0];
708   for (i=1; i<mbs; i++) {
709     /* move each row back by the amount of empty slots (fshift) before it*/
710     fshift += imax[i-1] - ailen[i-1];
711      rmax   = PetscMax(rmax,ailen[i]);
712      if (fshift) {
713        ip = aj + ai[i]; ap = aa + bs2*ai[i];
714        N = ailen[i];
715        for (j=0; j<N; j++) {
716          ip[j-fshift] = ip[j];
717          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
718        }
719      }
720      ai[i] = ai[i-1] + ailen[i-1];
721   }
722   if (mbs) {
723     fshift += imax[mbs-1] - ailen[mbs-1];
724      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
725   }
726   /* reset ilen and imax for each row */
727   for (i=0; i<mbs; i++) {
728     ailen[i] = imax[i] = ai[i+1] - ai[i];
729   }
730   a->nz = ai[mbs];
731 
732   /* diagonals may have moved, reset it */
733   if (a->diag) {
734     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
735   }
736   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
737   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
738   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
739   a->reallocs          = 0;
740   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
741   PetscFunctionReturn(0);
742 }
743 
744 /*
745    This function returns an array of flags which indicate the locations of contiguous
746    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
747    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
748    Assume: sizes should be long enough to hold all the values.
749 */
750 #undef __FUNCT__
751 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
752 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
753 {
754   PetscInt   i,j,k,row;
755   PetscTruth flg;
756 
757   PetscFunctionBegin;
758    for (i=0,j=0; i<n; j++) {
759      row = idx[i];
760      if (row%bs!=0) { /* Not the begining of a block */
761        sizes[j] = 1;
762        i++;
763      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
764        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
765        i++;
766      } else { /* Begining of the block, so check if the complete block exists */
767        flg = PETSC_TRUE;
768        for (k=1; k<bs; k++) {
769          if (row+k != idx[i+k]) { /* break in the block */
770            flg = PETSC_FALSE;
771            break;
772          }
773        }
774        if (flg) { /* No break in the bs */
775          sizes[j] = bs;
776          i+= bs;
777        } else {
778          sizes[j] = 1;
779          i++;
780        }
781      }
782    }
783    *bs_max = j;
784    PetscFunctionReturn(0);
785 }
786 
787 
788 /* Only add/insert a(i,j) with i<=j (blocks).
789    Any a(i,j) with i>j input by user is ingored.
790 */
791 
792 #undef __FUNCT__
793 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
794 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
795 {
796   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
797   PetscErrorCode ierr;
798   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
799   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
800   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
801   PetscInt       ridx,cidx,bs2=a->bs2;
802   MatScalar      *ap,value,*aa=a->a,*bap;
803 
804   PetscFunctionBegin;
805   for (k=0; k<m; k++) { /* loop over added rows */
806     row  = im[k];       /* row number */
807     brow = row/bs;      /* block row number */
808     if (row < 0) continue;
809 #if defined(PETSC_USE_DEBUG)
810     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
811 #endif
812     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
813     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
814     rmax = imax[brow];  /* maximum space allocated for this row */
815     nrow = ailen[brow]; /* actual length of this row */
816     low  = 0;
817 
818     for (l=0; l<n; l++) { /* loop over added columns */
819       if (in[l] < 0) continue;
820 #if defined(PETSC_USE_DEBUG)
821       if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1);
822 #endif
823       col = in[l];
824       bcol = col/bs;              /* block col number */
825 
826       if (brow > bcol) {
827         if (a->ignore_ltriangular){
828           continue; /* ignore lower triangular values */
829         } else {
830           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
831         }
832       }
833 
834       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
835       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
836         /* element value a(k,l) */
837         if (roworiented) {
838           value = v[l + k*n];
839         } else {
840           value = v[k + l*m];
841         }
842 
843         /* move pointer bap to a(k,l) quickly and add/insert value */
844         if (col <= lastcol) low = 0; high = nrow;
845         lastcol = col;
846         while (high-low > 7) {
847           t = (low+high)/2;
848           if (rp[t] > bcol) high = t;
849           else              low  = t;
850         }
851         for (i=low; i<high; i++) {
852           if (rp[i] > bcol) break;
853           if (rp[i] == bcol) {
854             bap  = ap +  bs2*i + bs*cidx + ridx;
855             if (is == ADD_VALUES) *bap += value;
856             else                  *bap  = value;
857             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
858             if (brow == bcol && ridx < cidx){
859               bap  = ap +  bs2*i + bs*ridx + cidx;
860               if (is == ADD_VALUES) *bap += value;
861               else                  *bap  = value;
862             }
863             goto noinsert1;
864           }
865         }
866 
867         if (nonew == 1) goto noinsert1;
868         if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
869         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
870 
871         N = nrow++ - 1; high++;
872         /* shift up all the later entries in this row */
873         for (ii=N; ii>=i; ii--) {
874           rp[ii+1] = rp[ii];
875           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
876         }
877         if (N>=i) {
878           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
879         }
880         rp[i]                      = bcol;
881         ap[bs2*i + bs*cidx + ridx] = value;
882       noinsert1:;
883         low = i;
884       }
885     }   /* end of loop over added columns */
886     ailen[brow] = nrow;
887   }   /* end of loop over added rows */
888   PetscFunctionReturn(0);
889 }
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
893 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
894 {
895   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
896   Mat            outA;
897   PetscErrorCode ierr;
898   PetscTruth     row_identity;
899 
900   PetscFunctionBegin;
901   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc");
902   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
903   if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported");
904   if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */
905 
906   outA        = inA;
907   inA->factor = FACTOR_CHOLESKY;
908 
909   ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
910   /*
911     Blocksize < 8 have a special faster factorization/solver
912     for ICC(0) factorization with natural ordering
913   */
914   switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */
915   case 1:
916     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
917     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
918     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
919     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
920     inA->ops->forwardsolve     = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
921     inA->ops->backwardsolve    = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
922     ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr);
923     break;
924   case 2:
925     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
926     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
927     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
928     inA->ops->forwardsolve     = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering;
929     inA->ops->backwardsolve    = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering;
930     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
931     break;
932   case 3:
933      inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
934      inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
935      inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
936      inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering;
937      inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering;
938      ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
939      break;
940   case 4:
941     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
942     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
943     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
944     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering;
945     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering;
946     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
947     break;
948   case 5:
949     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
950     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
951     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
952     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering;
953     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering;
954     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
955     break;
956   case 6:
957     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
958     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
959     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
960     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering;
961     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering;
962     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
963     break;
964   case 7:
965     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
966     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
967     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
968     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering;
969     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering;
970     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
971     break;
972   default:
973     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
974     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_N_NaturalOrdering;
975     inA->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
976     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering;
977     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering;
978     break;
979   }
980 
981   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
982   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
983   a->row = row;
984   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
985   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
986   a->col = row;
987 
988   /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */
989   if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);}
990   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
991 
992   if (!a->solve_work) {
993     ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
994     ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
995   }
996 
997   ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 EXTERN_C_BEGIN
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1004 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1005 {
1006   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1007   PetscInt     i,nz,n;
1008 
1009   PetscFunctionBegin;
1010   nz = baij->maxnz;
1011   n  = mat->cmap.n;
1012   for (i=0; i<nz; i++) {
1013     baij->j[i] = indices[i];
1014   }
1015    baij->nz = nz;
1016    for (i=0; i<n; i++) {
1017      baij->ilen[i] = baij->imax[i];
1018    }
1019    PetscFunctionReturn(0);
1020 }
1021 EXTERN_C_END
1022 
1023 #undef __FUNCT__
1024 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1025 /*@
1026   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1027   in the matrix.
1028 
1029   Input Parameters:
1030   +  mat     - the SeqSBAIJ matrix
1031   -  indices - the column indices
1032 
1033   Level: advanced
1034 
1035   Notes:
1036   This can be called if you have precomputed the nonzero structure of the
1037   matrix and want to provide it to the matrix object to improve the performance
1038   of the MatSetValues() operation.
1039 
1040   You MUST have set the correct numbers of nonzeros per row in the call to
1041   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1042 
1043   MUST be called before any calls to MatSetValues()
1044 
1045   .seealso: MatCreateSeqSBAIJ
1046 @*/
1047 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1048 {
1049   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1050 
1051   PetscFunctionBegin;
1052   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1053   PetscValidPointer(indices,2);
1054   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1055   if (f) {
1056     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1057   } else {
1058     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1059   }
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatCopy_SeqSBAIJ"
1065 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1066 {
1067   PetscErrorCode ierr;
1068 
1069   PetscFunctionBegin;
1070   /* If the two matrices have the same copy implementation, use fast copy. */
1071   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1072     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1073     Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1074 
1075     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1076       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1077     }
1078     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1079   } else {
1080     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1081     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1082     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1089 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1090 {
1091   PetscErrorCode ierr;
1092 
1093   PetscFunctionBegin;
1094   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 #undef __FUNCT__
1099 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1100 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1101 {
1102   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1103   PetscFunctionBegin;
1104   *array = a->a;
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1110 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1111 {
1112   PetscFunctionBegin;
1113   PetscFunctionReturn(0);
1114  }
1115 
1116 #include "petscblaslapack.h"
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1119 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1120 {
1121   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1122   PetscErrorCode ierr;
1123   PetscInt       i,bs=Y->rmap.bs,bs2,j;
1124   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1125 
1126   PetscFunctionBegin;
1127   if (str == SAME_NONZERO_PATTERN) {
1128     PetscScalar alpha = a;
1129     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1130   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1131     if (y->xtoy && y->XtoY != X) {
1132       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1133       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1134     }
1135     if (!y->xtoy) { /* get xtoy */
1136       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1137       y->XtoY = X;
1138     }
1139     bs2 = bs*bs;
1140     for (i=0; i<x->nz; i++) {
1141       j = 0;
1142       while (j < bs2){
1143         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1144         j++;
1145       }
1146     }
1147     ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1148   } else {
1149     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1150     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1151     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1152   }
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1158 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1159 {
1160   PetscFunctionBegin;
1161   *flg = PETSC_TRUE;
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1167 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1168 {
1169    PetscFunctionBegin;
1170    *flg = PETSC_TRUE;
1171    PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1176 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1177  {
1178    PetscFunctionBegin;
1179    *flg = PETSC_FALSE;
1180    PetscFunctionReturn(0);
1181  }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatRealPart_SeqSBAIJ"
1185 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1186 {
1187   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1188   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1189   PetscScalar    *aa = a->a;
1190 
1191   PetscFunctionBegin;
1192   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ"
1198 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1199 {
1200   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1201   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1202   PetscScalar    *aa = a->a;
1203 
1204   PetscFunctionBegin;
1205   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 /* -------------------------------------------------------------------*/
1210 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1211        MatGetRow_SeqSBAIJ,
1212        MatRestoreRow_SeqSBAIJ,
1213        MatMult_SeqSBAIJ_N,
1214 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1215        MatMult_SeqSBAIJ_N,       /* transpose versions are same as non-transpose versions */
1216        MatMultAdd_SeqSBAIJ_N,
1217        MatSolve_SeqSBAIJ_N,
1218        0,
1219        0,
1220 /*10*/ 0,
1221        0,
1222        MatCholeskyFactor_SeqSBAIJ,
1223        MatRelax_SeqSBAIJ,
1224        MatTranspose_SeqSBAIJ,
1225 /*15*/ MatGetInfo_SeqSBAIJ,
1226        MatEqual_SeqSBAIJ,
1227        MatGetDiagonal_SeqSBAIJ,
1228        MatDiagonalScale_SeqSBAIJ,
1229        MatNorm_SeqSBAIJ,
1230 /*20*/ 0,
1231        MatAssemblyEnd_SeqSBAIJ,
1232        0,
1233        MatSetOption_SeqSBAIJ,
1234        MatZeroEntries_SeqSBAIJ,
1235 /*25*/ 0,
1236        0,
1237        0,
1238        MatCholeskyFactorSymbolic_SeqSBAIJ,
1239        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1240 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1241        0,
1242        MatICCFactorSymbolic_SeqSBAIJ,
1243        MatGetArray_SeqSBAIJ,
1244        MatRestoreArray_SeqSBAIJ,
1245 /*35*/ MatDuplicate_SeqSBAIJ,
1246        MatForwardSolve_SeqSBAIJ_N,
1247        MatBackwardSolve_SeqSBAIJ_N,
1248        0,
1249        MatICCFactor_SeqSBAIJ,
1250 /*40*/ MatAXPY_SeqSBAIJ,
1251        MatGetSubMatrices_SeqSBAIJ,
1252        MatIncreaseOverlap_SeqSBAIJ,
1253        MatGetValues_SeqSBAIJ,
1254        MatCopy_SeqSBAIJ,
1255 /*45*/ 0,
1256        MatScale_SeqSBAIJ,
1257        0,
1258        0,
1259        0,
1260 /*50*/ 0,
1261        MatGetRowIJ_SeqSBAIJ,
1262        MatRestoreRowIJ_SeqSBAIJ,
1263        0,
1264        0,
1265 /*55*/ 0,
1266        0,
1267        0,
1268        0,
1269        MatSetValuesBlocked_SeqSBAIJ,
1270 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1271        0,
1272        0,
1273        0,
1274        0,
1275 /*65*/ 0,
1276        0,
1277        0,
1278        0,
1279        0,
1280 /*70*/ MatGetRowMaxAbs_SeqSBAIJ,
1281        0,
1282        0,
1283        0,
1284        0,
1285 /*75*/ 0,
1286        0,
1287        0,
1288        0,
1289        0,
1290 /*80*/ 0,
1291        0,
1292        0,
1293 #if !defined(PETSC_USE_COMPLEX)
1294        MatGetInertia_SeqSBAIJ,
1295 #else
1296        0,
1297 #endif
1298        MatLoad_SeqSBAIJ,
1299 /*85*/ MatIsSymmetric_SeqSBAIJ,
1300        MatIsHermitian_SeqSBAIJ,
1301        MatIsStructurallySymmetric_SeqSBAIJ,
1302        0,
1303        0,
1304 /*90*/ 0,
1305        0,
1306        0,
1307        0,
1308        0,
1309 /*95*/ 0,
1310        0,
1311        0,
1312        0,
1313        0,
1314 /*100*/0,
1315        0,
1316        0,
1317        0,
1318        0,
1319 /*105*/0,
1320        MatRealPart_SeqSBAIJ,
1321        MatImaginaryPart_SeqSBAIJ,
1322        MatGetRowUpperTriangular_SeqSBAIJ,
1323        MatRestoreRowUpperTriangular_SeqSBAIJ
1324 };
1325 
1326 EXTERN_C_BEGIN
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1329 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1330 {
1331   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1332   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   if (aij->nonew != 1) {
1337     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS,PETSC_TRUE);first");
1338   }
1339 
1340   /* allocate space for values if not already there */
1341   if (!aij->saved_values) {
1342     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1343   }
1344 
1345   /* copy values over */
1346   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1347   PetscFunctionReturn(0);
1348 }
1349 EXTERN_C_END
1350 
1351 EXTERN_C_BEGIN
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1354 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1355 {
1356   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1357   PetscErrorCode ierr;
1358   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1359 
1360   PetscFunctionBegin;
1361   if (aij->nonew != 1) {
1362     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS,PETSC_TRUE);first");
1363   }
1364   if (!aij->saved_values) {
1365     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1366   }
1367 
1368   /* copy values over */
1369   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 EXTERN_C_END
1373 
1374 EXTERN_C_BEGIN
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1377 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1378 {
1379   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1380   PetscErrorCode ierr;
1381   PetscInt       i,mbs,bs2;
1382   PetscTruth     skipallocation = PETSC_FALSE,flg;
1383 
1384   PetscFunctionBegin;
1385   B->preallocated = PETSC_TRUE;
1386   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1387   B->rmap.bs = B->cmap.bs = bs;
1388   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
1389   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
1390 
1391   mbs  = B->rmap.N/bs;
1392   bs2  = bs*bs;
1393 
1394   if (mbs*bs != B->rmap.N) {
1395     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1396   }
1397 
1398   if (nz == MAT_SKIP_ALLOCATION) {
1399     skipallocation = PETSC_TRUE;
1400     nz             = 0;
1401   }
1402 
1403   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1404   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1405   if (nnz) {
1406     for (i=0; i<mbs; i++) {
1407       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1408       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1409     }
1410   }
1411 
1412   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1413   if (!flg) {
1414     switch (bs) {
1415     case 1:
1416       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1417       B->ops->solve            = MatSolve_SeqSBAIJ_1;
1418       B->ops->solves           = MatSolves_SeqSBAIJ_1;
1419       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_1;
1420       B->ops->mult             = MatMult_SeqSBAIJ_1;
1421       B->ops->multadd          = MatMultAdd_SeqSBAIJ_1;
1422       B->ops->multtranspose    = MatMult_SeqSBAIJ_1;
1423       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1424       break;
1425     case 2:
1426       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1427       B->ops->solve            = MatSolve_SeqSBAIJ_2;
1428       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_2;
1429       B->ops->mult             = MatMult_SeqSBAIJ_2;
1430       B->ops->multadd          = MatMultAdd_SeqSBAIJ_2;
1431       B->ops->multtranspose    = MatMult_SeqSBAIJ_2;
1432       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1433       break;
1434     case 3:
1435       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1436       B->ops->solve            = MatSolve_SeqSBAIJ_3;
1437       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_3;
1438       B->ops->mult             = MatMult_SeqSBAIJ_3;
1439       B->ops->multadd          = MatMultAdd_SeqSBAIJ_3;
1440       B->ops->multtranspose    = MatMult_SeqSBAIJ_3;
1441       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1442       break;
1443     case 4:
1444       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1445       B->ops->solve            = MatSolve_SeqSBAIJ_4;
1446       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_4;
1447       B->ops->mult             = MatMult_SeqSBAIJ_4;
1448       B->ops->multadd          = MatMultAdd_SeqSBAIJ_4;
1449       B->ops->multtranspose    = MatMult_SeqSBAIJ_4;
1450       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1451       break;
1452     case 5:
1453       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1454       B->ops->solve            = MatSolve_SeqSBAIJ_5;
1455       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_5;
1456       B->ops->mult             = MatMult_SeqSBAIJ_5;
1457       B->ops->multadd          = MatMultAdd_SeqSBAIJ_5;
1458       B->ops->multtranspose    = MatMult_SeqSBAIJ_5;
1459       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1460       break;
1461     case 6:
1462       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1463       B->ops->solve            = MatSolve_SeqSBAIJ_6;
1464       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_6;
1465       B->ops->mult             = MatMult_SeqSBAIJ_6;
1466       B->ops->multadd          = MatMultAdd_SeqSBAIJ_6;
1467       B->ops->multtranspose    = MatMult_SeqSBAIJ_6;
1468       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1469       break;
1470     case 7:
1471       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1472       B->ops->solve            = MatSolve_SeqSBAIJ_7;
1473       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_7;
1474       B->ops->mult             = MatMult_SeqSBAIJ_7;
1475       B->ops->multadd          = MatMultAdd_SeqSBAIJ_7;
1476       B->ops->multtranspose    = MatMult_SeqSBAIJ_7;
1477       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1478       break;
1479     }
1480   }
1481 
1482   b->mbs = mbs;
1483   b->nbs = mbs;
1484   if (!skipallocation) {
1485     /* b->ilen will count nonzeros in each block row so far. */
1486     ierr   = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1487     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1488     if (!nnz) {
1489       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1490       else if (nz <= 0)        nz = 1;
1491       for (i=0; i<mbs; i++) {
1492         b->imax[i] = nz;
1493       }
1494       nz = nz*mbs; /* total nz */
1495     } else {
1496       nz = 0;
1497       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1498     }
1499     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1500 
1501     /* allocate the matrix space */
1502     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
1503     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1504     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1505     b->singlemalloc = PETSC_TRUE;
1506 
1507     /* pointer to beginning of each row */
1508     b->i[0] = 0;
1509     for (i=1; i<mbs+1; i++) {
1510       b->i[i] = b->i[i-1] + b->imax[i-1];
1511     }
1512     b->free_a     = PETSC_TRUE;
1513     b->free_ij    = PETSC_TRUE;
1514   } else {
1515     b->free_a     = PETSC_FALSE;
1516     b->free_ij    = PETSC_FALSE;
1517   }
1518 
1519   B->rmap.bs               = bs;
1520   b->bs2              = bs2;
1521   b->nz             = 0;
1522   b->maxnz          = nz*bs2;
1523 
1524   b->inew             = 0;
1525   b->jnew             = 0;
1526   b->anew             = 0;
1527   b->a2anew           = 0;
1528   b->permute          = PETSC_FALSE;
1529   PetscFunctionReturn(0);
1530 }
1531 EXTERN_C_END
1532 
1533 EXTERN_C_BEGIN
1534 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1535 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1536 EXTERN_C_END
1537 
1538 /*MC
1539   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1540   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1541 
1542   Options Database Keys:
1543   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1544 
1545   Level: beginner
1546 
1547   .seealso: MatCreateSeqSBAIJ
1548 M*/
1549 
1550 EXTERN_C_BEGIN
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1553 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1554 {
1555   Mat_SeqSBAIJ   *b;
1556   PetscErrorCode ierr;
1557   PetscMPIInt    size;
1558   PetscTruth     flg;
1559 
1560   PetscFunctionBegin;
1561   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1562   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1563 
1564   ierr    = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1565   B->data = (void*)b;
1566   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1567   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1568   B->ops->view        = MatView_SeqSBAIJ;
1569   B->factor           = 0;
1570   B->mapping          = 0;
1571   b->row              = 0;
1572   b->icol             = 0;
1573   b->reallocs         = 0;
1574   b->saved_values     = 0;
1575 
1576 
1577   b->sorted           = PETSC_FALSE;
1578   b->roworiented      = PETSC_TRUE;
1579   b->nonew            = 0;
1580   b->diag             = 0;
1581   b->solve_work       = 0;
1582   b->mult_work        = 0;
1583   B->spptr            = 0;
1584   b->keepzeroedrows   = PETSC_FALSE;
1585   b->xtoy             = 0;
1586   b->XtoY             = 0;
1587 
1588   b->inew             = 0;
1589   b->jnew             = 0;
1590   b->anew             = 0;
1591   b->a2anew           = 0;
1592   b->permute          = PETSC_FALSE;
1593 
1594   b->ignore_ltriangular = PETSC_FALSE;
1595   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr);
1596   if (flg) b->ignore_ltriangular = PETSC_TRUE;
1597 
1598   b->getrow_utriangular = PETSC_FALSE;
1599   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr);
1600   if (flg) b->getrow_utriangular = PETSC_TRUE;
1601 
1602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1603                                      "MatStoreValues_SeqSBAIJ",
1604                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1606                                      "MatRetrieveValues_SeqSBAIJ",
1607                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1609                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1610                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1612                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1613                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1614   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1615                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1616                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1617   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1618                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1619                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1620 
1621   B->symmetric                  = PETSC_TRUE;
1622   B->structurally_symmetric     = PETSC_TRUE;
1623   B->symmetric_set              = PETSC_TRUE;
1624   B->structurally_symmetric_set = PETSC_TRUE;
1625   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr);
1626   PetscFunctionReturn(0);
1627 }
1628 EXTERN_C_END
1629 
1630 #undef __FUNCT__
1631 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1632 /*@C
1633    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1634    compressed row) format.  For good matrix assembly performance the
1635    user should preallocate the matrix storage by setting the parameter nz
1636    (or the array nnz).  By setting these parameters accurately, performance
1637    during matrix assembly can be increased by more than a factor of 50.
1638 
1639    Collective on Mat
1640 
1641    Input Parameters:
1642 +  A - the symmetric matrix
1643 .  bs - size of block
1644 .  nz - number of block nonzeros per block row (same for all rows)
1645 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1646          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1647 
1648    Options Database Keys:
1649 .   -mat_no_unroll - uses code that does not unroll the loops in the
1650                      block calculations (much slower)
1651 .    -mat_block_size - size of the blocks to use
1652 
1653    Level: intermediate
1654 
1655    Notes:
1656    Specify the preallocated storage with either nz or nnz (not both).
1657    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1658    allocation.  For additional details, see the users manual chapter on
1659    matrices.
1660 
1661    If the nnz parameter is given then the nz parameter is ignored
1662 
1663 
1664 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1665 @*/
1666 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1667 {
1668   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1669 
1670   PetscFunctionBegin;
1671   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1672   if (f) {
1673     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1674   }
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 #undef __FUNCT__
1679 #define __FUNCT__ "MatCreateSeqSBAIJ"
1680 /*@C
1681    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1682    compressed row) format.  For good matrix assembly performance the
1683    user should preallocate the matrix storage by setting the parameter nz
1684    (or the array nnz).  By setting these parameters accurately, performance
1685    during matrix assembly can be increased by more than a factor of 50.
1686 
1687    Collective on MPI_Comm
1688 
1689    Input Parameters:
1690 +  comm - MPI communicator, set to PETSC_COMM_SELF
1691 .  bs - size of block
1692 .  m - number of rows, or number of columns
1693 .  nz - number of block nonzeros per block row (same for all rows)
1694 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1695          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1696 
1697    Output Parameter:
1698 .  A - the symmetric matrix
1699 
1700    Options Database Keys:
1701 .   -mat_no_unroll - uses code that does not unroll the loops in the
1702                      block calculations (much slower)
1703 .    -mat_block_size - size of the blocks to use
1704 
1705    Level: intermediate
1706 
1707    Notes:
1708    The number of rows and columns must be divisible by blocksize.
1709    This matrix type does not support complex Hermitian operation.
1710 
1711    Specify the preallocated storage with either nz or nnz (not both).
1712    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1713    allocation.  For additional details, see the users manual chapter on
1714    matrices.
1715 
1716    If the nnz parameter is given then the nz parameter is ignored
1717 
1718 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1719 @*/
1720 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1721 {
1722   PetscErrorCode ierr;
1723 
1724   PetscFunctionBegin;
1725   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1726   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1727   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1728   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1734 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1735 {
1736   Mat            C;
1737   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1738   PetscErrorCode ierr;
1739   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1740 
1741   PetscFunctionBegin;
1742   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1743 
1744   *B = 0;
1745   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1746   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
1747   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1748   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1749   c    = (Mat_SeqSBAIJ*)C->data;
1750 
1751   C->preallocated   = PETSC_TRUE;
1752   C->factor         = A->factor;
1753   c->row            = 0;
1754   c->icol           = 0;
1755   c->saved_values   = 0;
1756   c->keepzeroedrows = a->keepzeroedrows;
1757   C->assembled      = PETSC_TRUE;
1758 
1759   ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr);
1760   ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr);
1761   c->bs2  = a->bs2;
1762   c->mbs  = a->mbs;
1763   c->nbs  = a->nbs;
1764 
1765   ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr);
1766   for (i=0; i<mbs; i++) {
1767     c->imax[i] = a->imax[i];
1768     c->ilen[i] = a->ilen[i];
1769   }
1770 
1771   /* allocate the matrix space */
1772   ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
1773   c->singlemalloc = PETSC_TRUE;
1774   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
1776   if (mbs > 0) {
1777     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1778     if (cpvalues == MAT_COPY_VALUES) {
1779       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1780     } else {
1781       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1782     }
1783   }
1784 
1785   c->sorted      = a->sorted;
1786   c->roworiented = a->roworiented;
1787   c->nonew       = a->nonew;
1788 
1789   if (a->diag) {
1790     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
1791     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1792     for (i=0; i<mbs; i++) {
1793       c->diag[i] = a->diag[i];
1794     }
1795   } else c->diag  = 0;
1796   c->nz           = a->nz;
1797   c->maxnz        = a->maxnz;
1798   c->solve_work   = 0;
1799   c->mult_work    = 0;
1800   c->free_a       = PETSC_TRUE;
1801   c->free_ij      = PETSC_TRUE;
1802   *B = C;
1803   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1809 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A)
1810 {
1811   Mat_SeqSBAIJ   *a;
1812   Mat            B;
1813   PetscErrorCode ierr;
1814   int            fd;
1815   PetscMPIInt    size;
1816   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
1817   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1818   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
1819   PetscInt       *masked,nmask,tmp,bs2,ishift;
1820   PetscScalar    *aa;
1821   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1822 
1823   PetscFunctionBegin;
1824   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1825   bs2  = bs*bs;
1826 
1827   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1828   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1829   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1830   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1831   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1832   M = header[1]; N = header[2]; nz = header[3];
1833 
1834   if (header[3] < 0) {
1835     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1836   }
1837 
1838   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1839 
1840   /*
1841      This code adds extra rows to make sure the number of rows is
1842     divisible by the blocksize
1843   */
1844   mbs        = M/bs;
1845   extra_rows = bs - M + bs*(mbs);
1846   if (extra_rows == bs) extra_rows = 0;
1847   else                  mbs++;
1848   if (extra_rows) {
1849     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
1850   }
1851 
1852   /* read in row lengths */
1853   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1854   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1855   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1856 
1857   /* read in column indices */
1858   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1859   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1860   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1861 
1862   /* loop over row lengths determining block row lengths */
1863   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
1864   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1865   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
1866   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1867   masked   = mask + mbs;
1868   rowcount = 0; nzcount = 0;
1869   for (i=0; i<mbs; i++) {
1870     nmask = 0;
1871     for (j=0; j<bs; j++) {
1872       kmax = rowlengths[rowcount];
1873       for (k=0; k<kmax; k++) {
1874         tmp = jj[nzcount++]/bs;   /* block col. index */
1875         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1876       }
1877       rowcount++;
1878     }
1879     s_browlengths[i] += nmask;
1880 
1881     /* zero out the mask elements we set */
1882     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1883   }
1884 
1885   /* create our matrix */
1886   ierr = MatCreate(comm,&B);CHKERRQ(ierr);
1887   ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
1888   ierr = MatSetType(B,type);CHKERRQ(ierr);
1889   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
1890   a = (Mat_SeqSBAIJ*)B->data;
1891 
1892   /* set matrix "i" values */
1893   a->i[0] = 0;
1894   for (i=1; i<= mbs; i++) {
1895     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1896     a->ilen[i-1] = s_browlengths[i-1];
1897   }
1898   a->nz = a->i[mbs];
1899 
1900   /* read in nonzero values */
1901   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1902   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1903   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1904 
1905   /* set "a" and "j" values into matrix */
1906   nzcount = 0; jcount = 0;
1907   for (i=0; i<mbs; i++) {
1908     nzcountb = nzcount;
1909     nmask    = 0;
1910     for (j=0; j<bs; j++) {
1911       kmax = rowlengths[i*bs+j];
1912       for (k=0; k<kmax; k++) {
1913         tmp = jj[nzcount++]/bs; /* block col. index */
1914         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1915       }
1916     }
1917     /* sort the masked values */
1918     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1919 
1920     /* set "j" values into matrix */
1921     maskcount = 1;
1922     for (j=0; j<nmask; j++) {
1923       a->j[jcount++]  = masked[j];
1924       mask[masked[j]] = maskcount++;
1925     }
1926 
1927     /* set "a" values into matrix */
1928     ishift = bs2*a->i[i];
1929     for (j=0; j<bs; j++) {
1930       kmax = rowlengths[i*bs+j];
1931       for (k=0; k<kmax; k++) {
1932         tmp       = jj[nzcountb]/bs ; /* block col. index */
1933         if (tmp >= i){
1934           block     = mask[tmp] - 1;
1935           point     = jj[nzcountb] - bs*tmp;
1936           idx       = ishift + bs2*block + j + bs*point;
1937           a->a[idx] = aa[nzcountb];
1938         }
1939         nzcountb++;
1940       }
1941     }
1942     /* zero out the mask elements we set */
1943     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1944   }
1945   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1946 
1947   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1948   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1949   ierr = PetscFree(aa);CHKERRQ(ierr);
1950   ierr = PetscFree(jj);CHKERRQ(ierr);
1951   ierr = PetscFree(mask);CHKERRQ(ierr);
1952 
1953   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1954   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1955   ierr = MatView_Private(B);CHKERRQ(ierr);
1956   *A = B;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1962 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1963 {
1964   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1965   MatScalar      *aa=a->a,*v,*v1;
1966   PetscScalar    *x,*b,*t,sum,d;
1967   PetscErrorCode ierr;
1968   PetscInt       m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j;
1969   PetscInt       nz,nz1,*vj,*vj1,i;
1970 
1971   PetscFunctionBegin;
1972   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
1973 
1974   its = its*lits;
1975   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1976 
1977   if (bs > 1)
1978     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1979 
1980   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1981   if (xx != bb) {
1982     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1983   } else {
1984     b = x;
1985   }
1986 
1987   if (!a->relax_work) {
1988     ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr);
1989   }
1990   t = a->relax_work;
1991 
1992   if (flag & SOR_ZERO_INITIAL_GUESS) {
1993     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1994       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
1995 
1996       for (i=0; i<m; i++){
1997         d  = *(aa + ai[i]);  /* diag[i] */
1998         v  = aa + ai[i] + 1;
1999         vj = aj + ai[i] + 1;
2000         nz = ai[i+1] - ai[i] - 1;
2001         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2002         x[i] = omega*t[i]/d;
2003         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2004       }
2005     }
2006 
2007     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2008       if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){
2009         t = b;
2010       }
2011 
2012       for (i=m-1; i>=0; i--){
2013         d  = *(aa + ai[i]);
2014         v  = aa + ai[i] + 1;
2015         vj = aj + ai[i] + 1;
2016         nz = ai[i+1] - ai[i] - 1;
2017         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2018         sum = t[i];
2019         while (nz--) sum -= x[*vj++]*(*v++);
2020         x[i] =   (1-omega)*x[i] + omega*sum/d;
2021       }
2022       t = a->relax_work;
2023     }
2024     its--;
2025   }
2026 
2027   while (its--) {
2028     /*
2029        forward sweep:
2030        for i=0,...,m-1:
2031          sum[i] = (b[i] - U(i,:)x )/d[i];
2032          x[i]   = (1-omega)x[i] + omega*sum[i];
2033          b      = b - x[i]*U^T(i,:);
2034 
2035     */
2036     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2037       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2038 
2039       for (i=0; i<m; i++){
2040         d  = *(aa + ai[i]);  /* diag[i] */
2041         v  = aa + ai[i] + 1; v1=v;
2042         vj = aj + ai[i] + 1; vj1=vj;
2043         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2044         sum = t[i];
2045         ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr);
2046         while (nz1--) sum -= (*v1++)*x[*vj1++];
2047         x[i] = (1-omega)*x[i] + omega*sum/d;
2048         while (nz--) t[*vj++] -= x[i]*(*v++);
2049       }
2050     }
2051 
2052     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2053       /*
2054        backward sweep:
2055        b = b - x[i]*U^T(i,:), i=0,...,n-2
2056        for i=m-1,...,0:
2057          sum[i] = (b[i] - U(i,:)x )/d[i];
2058          x[i]   = (1-omega)x[i] + omega*sum[i];
2059       */
2060       /* if there was a forward sweep done above then I thing the next two for loops are not needed */
2061       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2062 
2063       for (i=0; i<m-1; i++){  /* update rhs */
2064         v  = aa + ai[i] + 1;
2065         vj = aj + ai[i] + 1;
2066         nz = ai[i+1] - ai[i] - 1;
2067         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2068         while (nz--) t[*vj++] -= x[i]*(*v++);
2069       }
2070       for (i=m-1; i>=0; i--){
2071         d  = *(aa + ai[i]);
2072         v  = aa + ai[i] + 1;
2073         vj = aj + ai[i] + 1;
2074         nz = ai[i+1] - ai[i] - 1;
2075         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2076         sum = t[i];
2077         while (nz--) sum -= x[*vj++]*(*v++);
2078         x[i] =   (1-omega)*x[i] + omega*sum/d;
2079       }
2080     }
2081   }
2082 
2083   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2084   if (bb != xx) {
2085     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2086   }
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays"
2092 /*@
2093      MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements
2094               (upper triangular entries in CSR format) provided by the user.
2095 
2096      Collective on MPI_Comm
2097 
2098    Input Parameters:
2099 +  comm - must be an MPI communicator of size 1
2100 .  bs - size of block
2101 .  m - number of rows
2102 .  n - number of columns
2103 .  i - row indices
2104 .  j - column indices
2105 -  a - matrix values
2106 
2107    Output Parameter:
2108 .  mat - the matrix
2109 
2110    Level: intermediate
2111 
2112    Notes:
2113        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2114     once the matrix is destroyed
2115 
2116        You cannot set new nonzero locations into this matrix, that will generate an error.
2117 
2118        The i and j indices are 0 based
2119 
2120 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ()
2121 
2122 @*/
2123 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2124 {
2125   PetscErrorCode ierr;
2126   PetscInt       ii;
2127   Mat_SeqSBAIJ   *sbaij;
2128 
2129   PetscFunctionBegin;
2130   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2131   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2132 
2133   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2134   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2135   ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr);
2136   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2137   sbaij = (Mat_SeqSBAIJ*)(*mat)->data;
2138   ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr);
2139 
2140   sbaij->i = i;
2141   sbaij->j = j;
2142   sbaij->a = a;
2143   sbaij->singlemalloc = PETSC_FALSE;
2144   sbaij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2145   sbaij->free_a       = PETSC_FALSE;
2146   sbaij->free_ij      = PETSC_FALSE;
2147 
2148   for (ii=0; ii<m; ii++) {
2149     sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii];
2150 #if defined(PETSC_USE_DEBUG)
2151     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2152 #endif
2153   }
2154 #if defined(PETSC_USE_DEBUG)
2155   for (ii=0; ii<sbaij->i[m]; ii++) {
2156     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2157     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2158   }
2159 #endif
2160 
2161   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2162   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 
2167 
2168 
2169 
2170